Skip to content

Commit c738b7f

Browse files
committed
🎉 add support for HGQ-proxy-model
🎉 add support for HGQ-proxy-model more consistent type naming revert example model version
1 parent 5c0c4e6 commit c738b7f

File tree

8 files changed

+706
-4
lines changed

8 files changed

+706
-4
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import numpy as np
2+
3+
from hls4ml.backends import Backend
4+
from hls4ml.backends.template import FunctionCallTemplate
5+
from hls4ml.model.layers import Layer
6+
from hls4ml.model.optimizer import OptimizerPass
7+
from hls4ml.model.optimizer.passes.hgq_proxy_model import FixedPointQuantizer
8+
from hls4ml.model.types import Source
9+
10+
11+
def to_apfixed(k, b, i, RND, SAT):
12+
u = 'u' if k == 0 else ''
13+
return f'ap_{u}fixed<{b},{i},AP_{RND},AP_{SAT}>'
14+
15+
16+
def to_acfixed(k, b, i, RND, SAT):
17+
k = 'false' if k == 0 else 'true'
18+
return f'ac_fixed<{b},{i},{k},AC_{RND},AC_{SAT}>'
19+
20+
21+
def generate_mask_fn(
22+
name: str, shape: tuple[int, ...], k: np.ndarray, b: np.ndarray, i: np.ndarray, RND: str, SAT: str, backend: str
23+
) -> str:
24+
"""Generate heterogenous quantization mask function, ONLY works for IOType=io_parallel"""
25+
assert k.shape[0] == b.shape[0] == i.shape[0] == 1
26+
assert backend.lower() in ('quartus', 'vivado', 'vitis'), f'Backend {backend} not tested'
27+
Ks, Bs, Is = k[0], b[0], i[0]
28+
Ks, Bs, Is = np.broadcast_to(Ks, shape), np.broadcast_to(Bs, shape), np.broadcast_to(Is, shape)
29+
Ks, Bs, Is = Ks.ravel(), Bs.ravel(), Is.ravel()
30+
masks = []
31+
to_fixed = to_acfixed if backend.lower() == 'quartus' else to_apfixed
32+
for idx, (k, b, i) in enumerate(zip(Ks, Bs, Is)):
33+
if b == 0:
34+
fn = f'out[{idx}] = 0;'
35+
else:
36+
fn = f'out[{idx}] = {to_fixed(k,b,i,RND,SAT)}(inp[{idx}]);'
37+
masks.append(f' {fn}')
38+
body = "\n".join(masks)
39+
mask_fn = f'''
40+
template<typename input_t, typename output_t>
41+
void {name}(input_t *inp, output_t *out) {{
42+
#pragma HLS INLINE
43+
#pragma HLS PIPELINE
44+
45+
{body}
46+
}}
47+
'''
48+
return mask_fn
49+
50+
51+
class ProcessFixedPointQuantizerLayer(OptimizerPass):
52+
def match(self, node: Layer):
53+
return isinstance(node, FixedPointQuantizer)
54+
55+
def transform(self, model, node: FixedPointQuantizer):
56+
if node.fusible:
57+
model.remove_node(node, rewire=True)
58+
return True
59+
60+
if model.config.config['IOType'] != 'io_parallel':
61+
raise NotImplementedError('Heterogenous quantization for activations is only supported with IOType=io_parallel')
62+
63+
backend = model.config.config['Backend']
64+
65+
name = node.name
66+
67+
assert node.mask_kbi is not None
68+
k, b, i = node.mask_kbi
69+
RND = node.RND
70+
SAT = node.SAT
71+
mask_fn: str = generate_mask_fn(name, node.get_input_variable().shape, k, b, i, RND, SAT, backend)
72+
73+
node.set_attr('mask_fn_codegen', Source(mask_fn))
74+
75+
76+
class ProcessFixedPointQuantizerCall(FunctionCallTemplate):
77+
def __init__(self):
78+
super().__init__(FixedPointQuantizer, include_header=[])
79+
self.template = 'nnet::{name}<{input_t}, {output_t}>({input}, {output});'
80+
81+
def format(self, node):
82+
params = self._default_function_params(node)
83+
84+
return self.template.format(**params)
85+
86+
87+
def register_hgq_proxy_model(backend: Backend):
88+
backend.register_pass('process_fixed_point_quantizer_layer', ProcessFixedPointQuantizerLayer)
89+
backend.register_template(ProcessFixedPointQuantizerCall)

hls4ml/backends/quartus/quartus_backend.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from contextlib import contextmanager
3+
from warnings import warn
34

45
import numpy as np
56

@@ -73,6 +74,7 @@ def _register_flows(self):
7374
'quartus:inplace_stream_flatten',
7475
'quartus:skip_softmax',
7576
'quartus:fix_softmax_table_size',
77+
'quartus:process_fixed_point_quantizer_layer',
7678
'infer_precision_types',
7779
]
7880
optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
@@ -265,7 +267,17 @@ def init_conv1d(self, layer):
265267
n_in, n_out = self.get_layer_mult_size(layer)
266268
self.set_target_reuse_factor(layer)
267269
self.set_closest_reuse_factor(layer, n_in, n_out)
268-
layer.set_attr('parallelization', layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1))
270+
271+
# Not overriding user parallelization factor, if already set and user has not specified a value
272+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
273+
layer_pf = layer.get_attr('parallelization_factor', None)
274+
chosen_pf = user_pf or layer_pf or 1
275+
if user_pf is not None and layer_pf is not None:
276+
if user_pf != layer_pf:
277+
warn(
278+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
279+
)
280+
layer.set_attr('parallelization', chosen_pf)
269281

270282
# impl_filt_width determines the filter size post-Winograd transformation
271283
layer.set_attr('impl_filt_width', layer.get_attr('filt_width'))
@@ -295,7 +307,17 @@ def init_conv2d(self, layer):
295307
n_in, n_out = self.get_layer_mult_size(layer)
296308
self.set_target_reuse_factor(layer)
297309
self.set_closest_reuse_factor(layer, n_in, n_out)
298-
layer.set_attr('parallelization', layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1))
310+
311+
# Not overriding user parallelization factor, if already set and user has not specified a value
312+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
313+
layer_pf = layer.get_attr('parallelization_factor', None)
314+
chosen_pf = user_pf or layer_pf or 1
315+
if user_pf is not None and layer_pf is not None:
316+
if user_pf != layer_pf:
317+
warn(
318+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
319+
)
320+
layer.set_attr('parallelization', chosen_pf)
299321

300322
# impl_filt_width & impl_filt_height determine the filter size post-Winograd transformation
301323
layer.set_attr('impl_filt_height', layer.get_attr('filt_height'))

hls4ml/backends/vivado/vivado_backend.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import sys
3+
from warnings import warn
34

45
import numpy as np
56

@@ -107,6 +108,7 @@ def _register_flows(self):
107108
'vivado:inplace_stream_flatten',
108109
'vivado:skip_softmax',
109110
'vivado:fix_softmax_table_size',
111+
'vivado:process_fixed_point_quantizer_layer',
110112
'infer_precision_types',
111113
]
112114
optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
@@ -266,7 +268,17 @@ def init_conv1d(self, layer):
266268
layer.set_attr('strategy', 'latency')
267269

268270
out_width = layer.get_output_variable().shape[0]
269-
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
271+
272+
# Not overriding user parallelization factor, if already set and user has not specified a value
273+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
274+
layer_pf = layer.get_attr('parallelization_factor', None)
275+
chosen_pf = user_pf or layer_pf or 1
276+
if user_pf is not None and layer_pf is not None:
277+
if user_pf != layer_pf:
278+
warn(
279+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
280+
)
281+
270282
valid_pf = self.get_valid_conv_partition_splits(1, out_width)
271283
if chosen_pf not in valid_pf:
272284
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
@@ -278,6 +290,7 @@ def init_conv1d(self, layer):
278290
else:
279291
closest_pf = chosen_pf
280292
layer.set_attr('n_partitions', out_width // closest_pf)
293+
layer.set_attr('parallelization_factor', closest_pf)
281294

282295
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
283296

@@ -332,7 +345,17 @@ def init_conv2d(self, layer):
332345

333346
out_height = layer.get_output_variable().shape[0]
334347
out_width = layer.get_output_variable().shape[1]
335-
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
348+
349+
# Not overriding user parallelization factor, if already set and user has not specified a value
350+
user_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', None)
351+
layer_pf = layer.get_attr('parallelization_factor', None)
352+
chosen_pf = user_pf or layer_pf or 1
353+
if user_pf is not None and layer_pf is not None:
354+
if user_pf != layer_pf:
355+
warn(
356+
f'For layer {layer.name}, parallelization factor of {layer_pf} is defined in the proxy-model, but is overridden by the user to {user_pf}.' # noqa: E501
357+
)
358+
336359
valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
337360
if chosen_pf not in valid_pf:
338361
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
@@ -344,6 +367,7 @@ def init_conv2d(self, layer):
344367
else:
345368
closest_pf = chosen_pf
346369
layer.set_attr('n_partitions', out_height * out_width // closest_pf)
370+
layer.set_attr('parallelization_factor', closest_pf)
347371

348372
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
349373

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from hls4ml.converters.keras_to_hls import keras_handler, parse_default_keras_layer
2+
3+
4+
@keras_handler('FixedPointQuantizer')
5+
def fixedpoint_quantizer_handler(keras_layer, input_names, input_shapes, data_reader):
6+
config = parse_default_keras_layer(keras_layer, input_names)
7+
8+
name = config['name']
9+
fusible = keras_layer['config']['fusible']
10+
config['RND'] = keras_layer['config']['RND']
11+
config['SAT'] = keras_layer['config']['SAT']
12+
config['fusible'] = fusible
13+
if not fusible:
14+
k = data_reader.get_weights_data(name, 'keep_negative')
15+
b = data_reader.get_weights_data(name, 'bits')
16+
i = data_reader.get_weights_data(name, 'integers')
17+
config['mask_kbi'] = k, b, i
18+
config['overrides'] = keras_layer['config']['overrides']
19+
20+
layer = config
21+
return layer, input_shapes[0]

hls4ml/model/optimizer/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
'qkeras_factorize_alpha',
4545
'extract_ternary_threshold',
4646
'fuse_consecutive_batch_normalization',
47+
'enforce_proxy_model_embedded_config',
4748
],
4849
) # TODO Maybe not all QKeras optmizers belong here?
4950

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import re
2+
from warnings import warn
3+
4+
from hls4ml.backends.fpga.fpga_types import NamedType
5+
from hls4ml.model.layers import Layer, register_layer
6+
from hls4ml.model.optimizer import OptimizerPass, register_pass
7+
from hls4ml.model.types import FixedPrecisionType, WeightVariable
8+
9+
re_purge_prefix = re.compile(r'(?<!\w)(?:ap_|ac_)', re.IGNORECASE)
10+
re_parse_fixed = re.compile(r'\s*(u?)fixed<([^>]+)>\s*', re.IGNORECASE)
11+
12+
13+
class FixedPointQuantizer(Layer):
14+
def initialize(self):
15+
inp = self.get_input_variable()
16+
shape = inp.shape
17+
dims = inp.dim_names
18+
self.add_output_variable(shape, dims)
19+
self.set_attr('n_in', self.get_input_variable().size())
20+
self.overrides = self.attributes['overrides']
21+
self.fusible = self.attributes['fusible']
22+
self.SAT, self.RND = self.attributes['SAT'], self.attributes['RND']
23+
self.mask_kbi = self.attributes.get('mask_kbi', None)
24+
25+
26+
def to_hls4ml_fixed(fixed: str):
27+
matched = re_parse_fixed.match(re_purge_prefix.sub('', fixed))
28+
assert matched is not None, f'Cannot parse {fixed}'
29+
signed = matched.group(1) != 'u'
30+
b, i, *args = matched.group(2).split(',')
31+
b, i = int(b), int(i)
32+
args = [arg.upper() for arg in args]
33+
new_type = FixedPrecisionType(b, i, signed, *args)
34+
# For some reason, __class__ is overwritten in hls4ml
35+
return new_type
36+
37+
38+
def userconf_ifdef(key: str, layer_name: str, model):
39+
hls_config: dict = model.config.config['HLSConfig']
40+
layer_confs: dict = hls_config.get('LayerName', None)
41+
if not layer_confs:
42+
return False
43+
layer_conf = layer_confs.get(layer_name, None)
44+
if not layer_conf:
45+
return False
46+
# return key in layer_conf # Ideal case. Not for now.
47+
if key.endswith('_t') and key != 'table_t':
48+
# table_t cannot be defined in Precision, for some reason.
49+
# On the other hand, result_t, weight_t, bias_t, accum_t cannot be decleared explicitly outside Precision, for now.
50+
# However, still assume that they can be defined explicitly outside Precision.
51+
precision_conf = layer_conf.get('Precision', None)
52+
if not precision_conf:
53+
return key in layer_conf
54+
return key[:-2] in precision_conf or key in layer_conf
55+
56+
if key == 'parallelization_factor':
57+
# Irregular config key name.
58+
return 'ParallelizationFactor' in layer_conf
59+
60+
return key in layer_conf
61+
62+
63+
class EnforceProxyModelEmbeddedConfig(OptimizerPass):
64+
def match(self, node: Layer):
65+
if not isinstance(node, FixedPointQuantizer):
66+
return False
67+
if not node.overrides:
68+
return False
69+
return True
70+
71+
def transform(self, model, node: FixedPointQuantizer):
72+
if 'layers' not in node.overrides:
73+
return False
74+
75+
graph_changed = False
76+
layers = node.overrides['layers']
77+
for name, conf in layers.items():
78+
conf: dict[str, str]
79+
name: str
80+
if name not in model.graph:
81+
# Some layer may be removed by other passes. (e.g. Final flatten layer)
82+
continue
83+
target_node: Layer = model.graph[name]
84+
for k, v in conf.items():
85+
if userconf_ifdef(k, name, model):
86+
warn(
87+
f'Config key {k} is defined in hls_config for layer {name} by user. Proxy model config is ignored.',
88+
stacklevel=1,
89+
)
90+
continue
91+
92+
if k.endswith('_t'):
93+
var_type = target_node.get_attr(k) # type: ignore
94+
if var_type is None:
95+
continue
96+
var_type: NamedType
97+
precision = to_hls4ml_fixed(v)
98+
var_type.precision = precision
99+
if k == 'result_t':
100+
type_name = f'{name}_t'
101+
else:
102+
type_name = f'{name}_{k}'
103+
var_type.name = type_name
104+
# Need to overwrite kernel/bias writing precision also, or written weights will likely be wrong.
105+
if k[:-2] in target_node.attributes.keys():
106+
weight_var: WeightVariable = target_node.attributes[k[:-2]]
107+
# weight_var should be a StaticWeightVariable, which is again, defined with meta programming
108+
# Type hinting using StaticWeightVariableDefinition which is the base class.
109+
weight_var.update_precision(precision)
110+
# Well, it turned out that there is yet ANOTHER copy saved in config.
111+
model.config.layer_name_precision[f'{name}_{k[:-2]}'] = v
112+
elif k in target_node.attributes.attributes:
113+
target_node.set_attr(k, v)
114+
elif k == 'parallelization_factor':
115+
target_node.set_attr(k, int(v))
116+
117+
if linear_node := model.graph.get(f'{name}_linear'):
118+
# Proxy model does not assume any extra linear layer.
119+
# Purge them on sight
120+
model.remove_node(linear_node)
121+
graph_changed = True
122+
123+
return graph_changed
124+
125+
126+
def register_hgq_proxy_model():
127+
register_layer('FixedPointQuantizer', FixedPointQuantizer)
128+
register_pass('enforce_proxy_model_embedded_config', EnforceProxyModelEmbeddedConfig)

0 commit comments

Comments
 (0)