Skip to content

Commit 7f28502

Browse files
authored
Merge pull request #914 from calad0i/HGQ-integration
Add support for HGQ proxy model
2 parents eb68da5 + 2f10431 commit 7f28502

File tree

15 files changed

+674
-8
lines changed

15 files changed

+674
-8
lines changed

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ generator:
77
stage: generate
88
image: python:3.8-alpine
99
variables:
10-
N_TESTS_PER_YAML: 5
10+
N_TESTS_PER_YAML: 4
1111
tags:
1212
- k8s-default
1313
before_script:
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import numpy as np
2+
3+
from hls4ml.backends import Backend
4+
from hls4ml.backends.template import FunctionCallTemplate
5+
from hls4ml.model.layers import Layer
6+
from hls4ml.model.optimizer import OptimizerPass
7+
from hls4ml.model.optimizer.passes.hgq_proxy_model import FixedPointQuantizer, UnaryLUT
8+
from hls4ml.model.types import Source
9+
10+
11+
def to_apfixed(k, b, i, RND, SAT):
12+
u = 'u' if k == 0 else ''
13+
return f'ap_{u}fixed<{b},{i},AP_{RND},AP_{SAT}>'
14+
15+
16+
def to_acfixed(k, b, i, RND, SAT):
17+
k = 'false' if k == 0 else 'true'
18+
return f'ac_fixed<{b},{i},{k},AC_{RND},AC_{SAT}>'
19+
20+
21+
def generate_mask_fn(
22+
name: str, shape: tuple[int, ...], k: np.ndarray, b: np.ndarray, i: np.ndarray, RND: str, SAT: str, backend: str
23+
) -> str:
24+
"""Generate heterogenous quantization mask function, ONLY works for IOType=io_parallel"""
25+
assert k.shape[0] == b.shape[0] == i.shape[0] == 1
26+
assert backend.lower() in ('quartus', 'vivado', 'vitis'), f'Backend {backend} not tested'
27+
Ks, Bs, Is = k[0], b[0], i[0]
28+
Ks, Bs, Is = np.broadcast_to(Ks, shape), np.broadcast_to(Bs, shape), np.broadcast_to(Is, shape)
29+
Ks, Bs, Is = Ks.ravel(), Bs.ravel(), Is.ravel()
30+
masks = []
31+
to_fixed = to_acfixed if backend.lower() == 'quartus' else to_apfixed
32+
for idx, (k, b, i) in enumerate(zip(Ks, Bs, Is)):
33+
if b == 0:
34+
fn = f'out[{idx}] = 0;'
35+
else:
36+
fn = f'out[{idx}] = {to_fixed(k, b, i, RND, SAT)}(inp[{idx}]);'
37+
masks.append(f' {fn}')
38+
body = "\n".join(masks)
39+
mask_fn = f'''
40+
template<typename input_t, typename output_t>
41+
void {name}(input_t *inp, output_t *out) {{
42+
#pragma HLS INLINE
43+
44+
{body}
45+
}}
46+
'''
47+
return mask_fn
48+
49+
50+
class ProcessFixedPointQuantizerLayer(OptimizerPass):
51+
def match(self, node: Layer):
52+
return isinstance(node, FixedPointQuantizer)
53+
54+
def transform(self, model, node: FixedPointQuantizer):
55+
if node.fusible:
56+
model.remove_node(node, rewire=True)
57+
return True
58+
59+
if model.config.config['IOType'] != 'io_parallel':
60+
raise NotImplementedError('Heterogenous quantization for activations is only supported with IOType=io_parallel')
61+
62+
backend = model.config.config['Backend']
63+
64+
name = node.name
65+
66+
assert node.mask_kbi is not None
67+
k, b, i = node.mask_kbi
68+
RND = node.RND
69+
SAT = node.SAT
70+
mask_fn: str = generate_mask_fn(name, node.get_input_variable().shape, k, b, i, RND, SAT, backend)
71+
72+
node.set_attr('mask_fn_codegen', Source(mask_fn))
73+
74+
75+
class ProcessFixedPointQuantizerCall(FunctionCallTemplate):
76+
def __init__(self):
77+
super().__init__(FixedPointQuantizer, include_header=[])
78+
self.template = 'nnet::{name}<{input_t}, {output_t}>({input}, {output});'
79+
80+
def format(self, node):
81+
params = self._default_function_params(node)
82+
83+
return self.template.format(**params)
84+
85+
86+
class ProcessUnaryLUTCall(FunctionCallTemplate):
87+
def __init__(self):
88+
super().__init__(UnaryLUT, include_header=[])
89+
self.template = 'nnet::unary_lut<{input_t}, {output_t}, {config}>({input}, {output}, {table});'
90+
self.include_header = [
91+
'nnet_utils/nnet_activation.h',
92+
'nnet_utils/nnet_activation_stream.h',
93+
]
94+
95+
def format(self, node):
96+
params = self._default_function_params(node)
97+
node.attributes['result_t'].precision = node.attributes['table_t'].precision
98+
params['config'] = f'unary_lut_config{node.index}'
99+
params['table'] = node.get_weights('table').name
100+
101+
return self.template.format(**params)
102+
103+
104+
def register_hgq_proxy_model(backend: Backend):
105+
backend.register_pass('process_fixed_point_quantizer_layer', ProcessFixedPointQuantizerLayer)
106+
backend.register_template(ProcessFixedPointQuantizerCall)
107+
backend.register_template(ProcessUnaryLUTCall)

hls4ml/backends/quartus/passes/core_templates.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from hls4ml.backends.backend import get_backend
22
from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
33
from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax
4+
from hls4ml.model.optimizer.passes.hgq_proxy_model import UnaryLUT
45

56
# Dense templates
67

@@ -152,7 +153,7 @@ def format(self, node):
152153

153154
class ActivationConfigTemplate(LayerConfigTemplate):
154155
def __init__(self):
155-
super().__init__((Activation, ParametrizedActivation, PReLU))
156+
super().__init__((Activation, ParametrizedActivation, PReLU, UnaryLUT))
156157
self.template = activ_config_template
157158

158159
def format(self, node):

hls4ml/backends/quartus/quartus_backend.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from contextlib import contextmanager
3+
from warnings import warn
34

45
import numpy as np
56

@@ -73,6 +74,7 @@ def _register_flows(self):
7374
'quartus:inplace_stream_flatten',
7475
'quartus:skip_softmax',
7576
'quartus:fix_softmax_table_size',
77+
'quartus:process_fixed_point_quantizer_layer',
7678
'infer_precision_types',
7779
]
7880
optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
@@ -265,7 +267,17 @@ def init_conv1d(self, layer):
265267
n_in, n_out = self.get_layer_mult_size(layer)
266268
self.set_target_reuse_factor(layer)
267269
self.set_closest_reuse_factor(layer, n_in, n_out)
268-
layer.set_attr('parallelization', layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1))
270+
271+
# Not overriding user parallelization factor, if already set and user has not specified a value
272+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
273+
layer_pf = layer.get_attr('parallelization_factor', None)
274+
chosen_pf = user_pf or layer_pf or 1
275+
if user_pf is not None and layer_pf is not None:
276+
if user_pf != layer_pf:
277+
warn(
278+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
279+
)
280+
layer.set_attr('parallelization', chosen_pf)
269281

270282
# impl_filt_width determines the filter size post-Winograd transformation
271283
layer.set_attr('impl_filt_width', layer.get_attr('filt_width'))
@@ -295,7 +307,17 @@ def init_conv2d(self, layer):
295307
n_in, n_out = self.get_layer_mult_size(layer)
296308
self.set_target_reuse_factor(layer)
297309
self.set_closest_reuse_factor(layer, n_in, n_out)
298-
layer.set_attr('parallelization', layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1))
310+
311+
# Not overriding user parallelization factor, if already set and user has not specified a value
312+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
313+
layer_pf = layer.get_attr('parallelization_factor', None)
314+
chosen_pf = user_pf or layer_pf or 1
315+
if user_pf is not None and layer_pf is not None:
316+
if user_pf != layer_pf:
317+
warn(
318+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
319+
)
320+
layer.set_attr('parallelization', chosen_pf)
299321

300322
# impl_filt_width & impl_filt_height determine the filter size post-Winograd transformation
301323
layer.set_attr('impl_filt_height', layer.get_attr('filt_height'))

hls4ml/backends/vivado/passes/core_templates.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from hls4ml.backends.backend import get_backend
22
from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
33
from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax
4+
from hls4ml.model.optimizer.passes.hgq_proxy_model import UnaryLUT
45

56
# Dense templates
67

@@ -144,7 +145,7 @@ def format(self, node):
144145

145146
class ActivationConfigTemplate(LayerConfigTemplate):
146147
def __init__(self):
147-
super().__init__((Activation, ParametrizedActivation, PReLU))
148+
super().__init__((Activation, ParametrizedActivation, PReLU, UnaryLUT))
148149
self.template = activ_config_template
149150

150151
def format(self, node):

hls4ml/backends/vivado/vivado_backend.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import sys
3+
from warnings import warn
34

45
import numpy as np
56

@@ -107,6 +108,7 @@ def _register_flows(self):
107108
'vivado:inplace_stream_flatten',
108109
'vivado:skip_softmax',
109110
'vivado:fix_softmax_table_size',
111+
'vivado:process_fixed_point_quantizer_layer',
110112
'infer_precision_types',
111113
]
112114
optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
@@ -295,7 +297,17 @@ def init_conv1d(self, layer):
295297
layer.set_attr('strategy', 'latency')
296298

297299
out_width = layer.get_output_variable().shape[0]
298-
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
300+
301+
# Not overriding user parallelization factor, if already set and user has not specified a value
302+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
303+
layer_pf = layer.get_attr('parallelization_factor', None)
304+
chosen_pf = user_pf or layer_pf or 1
305+
if user_pf is not None and layer_pf is not None:
306+
if user_pf != layer_pf:
307+
warn(
308+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
309+
)
310+
299311
valid_pf = self.get_valid_conv_partition_splits(1, out_width)
300312
if chosen_pf not in valid_pf:
301313
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
@@ -307,6 +319,7 @@ def init_conv1d(self, layer):
307319
else:
308320
closest_pf = chosen_pf
309321
layer.set_attr('n_partitions', out_width // closest_pf)
322+
layer.set_attr('parallelization_factor', closest_pf)
310323

311324
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
312325

@@ -361,7 +374,17 @@ def init_conv2d(self, layer):
361374

362375
out_height = layer.get_output_variable().shape[0]
363376
out_width = layer.get_output_variable().shape[1]
364-
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
377+
378+
# Not overriding user parallelization factor, if already set and user has not specified a value
379+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
380+
layer_pf = layer.get_attr('parallelization_factor', None)
381+
chosen_pf = user_pf or layer_pf or 1
382+
if user_pf is not None and layer_pf is not None:
383+
if user_pf != layer_pf:
384+
warn(
385+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
386+
)
387+
365388
valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
366389
if chosen_pf not in valid_pf:
367390
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
@@ -373,6 +396,7 @@ def init_conv2d(self, layer):
373396
else:
374397
closest_pf = chosen_pf
375398
layer.set_attr('n_partitions', out_height * out_width // closest_pf)
399+
layer.set_attr('parallelization_factor', closest_pf)
376400

377401
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
378402

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from hls4ml.converters.keras_to_hls import KerasReader, keras_handler, parse_default_keras_layer
2+
3+
4+
@keras_handler('FixedPointQuantizer', 'HGQ>FixedPointQuantizer')
5+
def fixedpoint_quantizer_handler(keras_layer, input_names, input_shapes, data_reader: KerasReader):
6+
config = parse_default_keras_layer(keras_layer, input_names)
7+
8+
name = config['name']
9+
fusible = keras_layer['config']['fusible']
10+
config['RND'] = keras_layer['config']['RND']
11+
config['SAT'] = keras_layer['config']['SAT']
12+
config['fusible'] = fusible
13+
if not fusible:
14+
k = data_reader.get_weights_data(name, 'keep_negative')
15+
b = data_reader.get_weights_data(name, 'bits')
16+
i = data_reader.get_weights_data(name, 'integers')
17+
config['mask_kbi'] = k, b, i
18+
config['overrides'] = keras_layer['config']['overrides']
19+
20+
layer = config
21+
return layer, input_shapes[0]
22+
23+
24+
@keras_handler('UnaryLUT', 'HGQ>UnaryLUT')
25+
def unary_lut_keras_handler(keras_layer, input_names, input_shapes, data_reader: KerasReader):
26+
config = parse_default_keras_layer(keras_layer, input_names)
27+
28+
table = data_reader.get_weights_data(config['name'], 'table')
29+
k, i, f = keras_layer['config']['kif_out']
30+
k, b, i = k, k + i + f, k + i
31+
config['table_t'] = f'{"" if k else "u"}fixed<{b},{i}>'
32+
config['table'] = table
33+
config['table_size'] = len(table)
34+
config['activation'] = 'unary_lut'
35+
36+
layer = config
37+
return layer, input_shapes[0]

hls4ml/converters/keras_to_hls.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ def parse_keras_model(model_arch, reader):
205205
'Softmax',
206206
'TernaryTanh',
207207
'HardActivation',
208+
'UnaryLUT',
209+
'HGQ>UnaryLUT',
208210
]
209211
# Recurrent layers
210212
recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU']

hls4ml/model/optimizer/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
'extract_ternary_threshold',
4646
'fuse_consecutive_batch_normalization',
4747
'replace_multidimensional_dense_with_conv',
48+
'enforce_proxy_model_embedded_config',
4849
],
4950
) # TODO Maybe not all QKeras optmizers belong here?
5051

0 commit comments

Comments
 (0)