From f97506627266903177397f8df28697b7e4733d18 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Thu, 3 Apr 2025 10:17:51 +0200 Subject: [PATCH 01/26] ADD parsing for bidirectional RNN layers --- hls4ml/converters/keras/recurrent.py | 67 ++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index 9f98b33f76..50003fb1b5 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -9,6 +9,7 @@ parse_default_keras_layer, parse_keras_model, ) +import numpy as np rnn_layers = ['SimpleRNN', 'LSTM', 'GRU'] @@ -18,6 +19,7 @@ def parse_rnn_layer(keras_layer, input_names, input_shapes, data_reader): assert keras_layer['class_name'] in rnn_layers or keras_layer['class_name'][1:] in rnn_layers layer = parse_default_keras_layer(keras_layer, input_names) + layer['direction'] = 'forward' layer['return_sequences'] = keras_layer['config']['return_sequences'] layer['return_state'] = keras_layer['config']['return_state'] @@ -109,4 +111,69 @@ def parse_time_distributed_layer(keras_layer, input_names, input_shapes, data_re layer['output_shape'] = output_shape[1:] # Remove the batch dimension layer['n_time_steps'] = output_shape[1] + +@keras_handler('Bidirectional') +def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reader): + assert keras_layer['class_name'] == 'Bidirectional' + + rnn_layer = keras_layer['config']['layer'] + assert rnn_layer['class_name'] in rnn_layers or rnn_layer['class_name'][1:] in rnn_layers + + layer = parse_default_keras_layer(rnn_layer, input_names) + layer['name'] = keras_layer['config']['name'] + layer['direction'] = 'bidirectional' + + layer['return_sequences'] = rnn_layer['config']['return_sequences'] + layer['return_state'] = rnn_layer['config']['return_state'] + + if 'SimpleRNN' not in layer['class_name']: + layer['recurrent_activation'] = rnn_layer['config']['recurrent_activation'] + + layer['time_major'] = rnn_layer['config']['time_major'] if 'time_major' in rnn_layer['config'] else False + + # TODO Should we handle time_major? + if layer['time_major']: + raise Exception('Time-major format is not supported by hls4ml') + + layer['n_timesteps'] = input_shapes[0][1] + layer['n_in'] = input_shapes[0][2] + + layer['n_out'] = 2*rnn_layer['config']['units'] + + + if 'SimpleRNN' in layer['class_name']: + cell_name = 'simple_rnn' + else: + cell_name = layer['class_name'].lower() + weight_data_f, recurrent_weight_data_f, bias_data_f = get_weights_data( + data_reader, layer['name'], [f'forward_{cell_name}/{cell_name}_cell/kernel', + f'forward_{cell_name}/{cell_name}_cell/recurrent_kernel', + f'forward_{cell_name}/{cell_name}_cell/bias'] + ) + weight_data_b, recurrent_weight_data_b, bias_data_b = get_weights_data( + data_reader, layer['name'], [f'backward_{cell_name}/{cell_name}_cell/kernel', + f'backward_{cell_name}/{cell_name}_cell/recurrent_kernel', + f'backward_{cell_name}/{cell_name}_cell/bias'] + ) + layer['weight_data'] = np.stack((weight_data_f, weight_data_b), axis=0) + layer['recurrent_weight_data'] = np.stack((recurrent_weight_data_f, recurrent_weight_data_b), axis=0) + layer['bias_data'] = np.stack((bias_data_f, bias_data_b), axis=0) + + if 'GRU' in layer['class_name']: + layer['apply_reset_gate'] = 'after' if rnn_layer['config']['reset_after'] else 'before' + + # biases array is actually a 2-dim array of arrays (bias + recurrent bias) + # both arrays have shape: n_units * 3 (z, r, h_cand) + biases = layer['bias_data'] + layer['bias_data'] = biases[0] + layer['recurrent_bias_data'] = biases[1] + + if layer['return_sequences']: + output_shape = [input_shapes[0][0], layer['n_timesteps'], layer['n_out']] + else: + output_shape = [input_shapes[0][0], layer['n_out']] + + if layer['return_state']: + raise Exception('"return_state" of {} layer is not yet supported.') + return layer, output_shape From 8121281d17166db3657c2e1469f0ccac0d9f8e97 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Wed, 16 Apr 2025 18:19:59 +0200 Subject: [PATCH 02/26] Implement bidirectional rnn layers --- .../vivado/passes/recurrent_templates.py | 183 +++++++++++++++- hls4ml/converters/keras/recurrent.py | 32 +-- hls4ml/model/layers.py | 70 +++++- .../vivado/nnet_utils/nnet_recurrent.h | 200 ++++++++++++++++++ 4 files changed, 467 insertions(+), 18 deletions(-) diff --git a/hls4ml/backends/vivado/passes/recurrent_templates.py b/hls4ml/backends/vivado/passes/recurrent_templates.py index 6934e82e4e..ea81d294c0 100644 --- a/hls4ml/backends/vivado/passes/recurrent_templates.py +++ b/hls4ml/backends/vivado/passes/recurrent_templates.py @@ -1,6 +1,6 @@ from hls4ml.backends.backend import get_backend from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate -from hls4ml.model.layers import GRU, LSTM, TimeDistributed +from hls4ml.model.layers import GRU, LSTM, BLSTM, BGRU, TimeDistributed # recurrent multiplication template @@ -86,10 +86,44 @@ static const bool pytorch_order = {pytorch}; }};\n""" +bidir_recr_config_template = """struct config{index} : nnet::{recr_type}_config {{ + typedef {accum_t.name} accum_t; + typedef {weight_t.name} weight_t; // Matrix + typedef {recurrent_weight_t.name} recurrent_weight_t; // Matrix + typedef {bias_t.name} bias_t; // Vector + typedef {recurrent_bias_t.name} recurrent_bias_t; // Vector + typedef {weight_b_t.name} weight_b_t; // Matrix + typedef {recurrent_weight_b_t.name} recurrent_weight_b_t; // Matrix + typedef {bias_b_t.name} bias_b_t; // Vector + typedef {recurrent_bias_b_t.name} recurrent_bias_b_t; // Vector + typedef {config_mult_t1} mult_config1; + typedef {config_mult_t2} mult_config2; + typedef {recr_act_t} ACT_CONFIG_{RECR_TYPE}; + template + using activation_recr = nnet::activation::{recurrent_activation}; + typedef {act_t} ACT_CONFIG_T; + template + using activation = nnet::activation::{activation}; + static const unsigned n_in = {n_in}; + static const unsigned n_out = {n_out}; + static const unsigned n_state = {n_state}; + static const unsigned n_sequence = {n_sequence}; + static const unsigned n_sequence_out = {n_sequence_out}; + static const unsigned io_type = nnet::{strategy}; + static const unsigned reuse_factor = {reuse}; + static const bool store_weights_in_bram = false; + static const bool use_static = {static}; + static const bool pytorch_order = {pytorch}; +}};\n""" + recr_function_template = 'nnet::{recr_type}_stack<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br});' recr_function_template_initial_states_lstm = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 recr_function_template_initial_states_gru = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 +recr_bidir_function_template = 'nnet::{recr_type}_stack<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br}, {w_b}, {wr_b}, {b_b}, {br_b});' +recr_bidir_function_template_initial_states_lstm = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 +recr_bidir_function_template_initial_states_gru = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 + recr_include_list = ['nnet_utils/nnet_recurrent.h'] @@ -206,6 +240,118 @@ def format(self, node): return mult_config1 + '\n' + mult_config2 + '\n' + recr_act_config + '\n' + act_config + '\n' + recr_config +class BidirectionalRecurrentConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__((BLSTM, BGRU)) + self.template = bidir_recr_config_template + self.act_template = activ_config_template + self.recr_act_template = recr_activ_config_template + self.mult1_template = recr_mult_config_template_1 + self.mult2_template = recr_mult_config_template_2 + + def format(self, node): + params = self._default_config_params(node) + + params['n_in'] = node.get_input_variable().dim_names[1] + params['n_sequence'] = node.get_input_variable().dim_names[0] + if node.get_attr('return_sequences'): + params['n_sequence_out'] = node.get_output_variable().dim_names[0] + params['n_state'] = node.get_output_variable().dim_names[1] + params['n_out'] = node.get_output_variable().dim_names[1] + else: + params['n_sequence_out'] = 1 + params['n_state'] = node.get_output_variable().dim_names[0] + params['n_out'] = node.get_output_variable().dim_names[0] + params['config_mult_t1'] = f'config{node.index}_1' + params['config_mult_t2'] = f'config{node.index}_2' + params['recr_act_t'] = '{}_config{}_recr'.format(node.get_attr('recurrent_activation'), node.index) + params['act_t'] = '{}_config{}'.format(node.get_attr('activation'), node.index) + params['strategy'] = node.get_attr('strategy') + params['static'] = 'true' if node.attributes['static'] else 'false' + params['pytorch'] = 'true' if node.get_attr('pytorch', False) else 'false' + params['recr_type'] = node.class_name.lower() + params['RECR_TYPE'] = node.class_name[1:] + + if node.class_name == 'BLSTM': + n_recr_mult = 4 + else: # BGRU + n_recr_mult = 3 + + recr_config = self.template.format(**params) + + act_params = self._default_config_params(node) + recr_act_params = self._default_config_params(node) + + act_params['type'] = node.get_attr('activation') + recr_act_params['type'] = node.get_attr('recurrent_activation') + if node.get_attr('return_sequences'): + act_params['n_in'] = node.get_output_variable().shape[1] + recr_act_params['n_in'] = node.get_output_variable().shape[1] * (n_recr_mult - 1) + else: + act_params['n_in'] = node.get_output_variable().shape[0] + recr_act_params['n_in'] = node.get_output_variable().shape[0] * (n_recr_mult - 1) + + act_config = self.act_template.format(**act_params) + recr_act_config = self.recr_act_template.format(**recr_act_params) + + mult_params1 = self._default_config_params(node) + mult_params2 = self._default_config_params(node) + + mult_params1['n_in'] = node.get_input_variable().shape[1] + if node.get_attr('return_sequences'): + mult_params1['n_out'] = node.get_output_variable().shape[1] / 2 * n_recr_mult + else: + mult_params1['n_out'] = node.get_output_variable().shape[0] / 2 * n_recr_mult + mult_params1['product_type'] = get_backend('vivado').product_type( + node.get_input_variable().type.precision, node.get_weights('weight').type.precision + ) + mult_params1['reuse'] = params['reuse'] + mult_params1['index'] = str(node.index) + '_1' + mult_params1['nzeros'] = node.get_weights('weight').nzeros + mult_params1['nonzeros'] = node.get_weights('weight').nonzeros + + namespace = params['namespace'] + + if node.get_attr('strategy').lower() == 'latency': + mult_params1['dense_function'] = 'nnet::DenseLatency' + elif node.get_attr('strategy').lower() == 'resource': + if int(mult_params1['reuse_factor']) <= int(mult_params1['n_in']): + mult_params1['dense_function'] = 'nnet::DenseResource_rf_leq_nin' + else: + mult_params1['dense_function'] = 'nnet::DenseResource_rf_gt_nin_rem0' + # The 3rd case is never used + elif node.get_attr('strategy').lower() == 'resource_unrolled': + mult_params1['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}_1' + + if node.get_attr('return_sequences'): + mult_params2['n_in'] = node.get_output_variable().shape[1] / 2 + mult_params2['n_out'] = node.get_output_variable().shape[1] / 2 * n_recr_mult + else: + mult_params2['n_in'] = node.get_output_variable().shape[0] / 2 + mult_params2['n_out'] = node.get_output_variable().shape[0] / 2 * n_recr_mult + mult_params2['product_type'] = get_backend('vivado').product_type( + node.get_input_variable().type.precision, node.get_weights('recurrent_weight').type.precision + ) + mult_params2['reuse'] = node.attributes['recurrent_reuse_factor'] + mult_params2['index'] = str(node.index) + '_2' + mult_params2['nzeros'] = node.get_weights('recurrent_weight').nzeros + mult_params2['nonzeros'] = node.get_weights('recurrent_weight').nonzeros + + if node.get_attr('strategy').lower() == 'latency': + mult_params2['dense_function'] = 'nnet::DenseLatency' + elif node.get_attr('strategy').lower() == 'resource': + if int(mult_params2['reuse_factor']) <= int(mult_params2['n_in']): + mult_params2['dense_function'] = 'nnet::DenseResource_rf_leq_nin' + else: + mult_params2['dense_function'] = 'nnet::DenseResource_rf_gt_nin_rem0' + # The 3rd case is never used + elif node.get_attr('strategy').lower() == 'resource_unrolled': + mult_params2['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}_2' + + mult_config1 = self.mult1_template.format(**mult_params1) + mult_config2 = self.mult2_template.format(**mult_params2) + + return mult_config1 + '\n' + mult_config2 + '\n' + recr_act_config + '\n' + act_config + '\n' + recr_config class RecurrentFunctionTemplate(FunctionCallTemplate): def __init__(self): @@ -303,3 +449,38 @@ def format(self, node): return self.template_start.format(**params) else: return self.template_end.format(**params) + +class BidirectionalRecurrentFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__((BLSTM, BGRU), include_header=recr_include_list) + + def format(self, node): + params = self._default_function_params(node) + if params['pass_initial_states'] == 'true': + params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name + params['input2'] = node.get_input_variable(node.inputs[1]).name + if node.class_name == 'BLSTM': + params['input3'] = node.get_input_variable(node.inputs[2]).name + params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name + + params['w'] = node.get_weights('weight').name + params['b'] = node.get_weights('bias').name + params['wr'] = node.get_weights('recurrent_weight').name + params['br'] = node.get_weights('recurrent_bias').name + params['w_b'] = node.get_weights('weight_b').name + params['b_b'] = node.get_weights('bias_b').name + params['wr_b'] = node.get_weights('recurrent_weight_b').name + params['br_b'] = node.get_weights('recurrent_bias_b').name + params['activation'] = node.get_attr('activation') + params['recurrent_activation'] = node.get_attr('recurrent_activation') + params['recr_type'] = node.class_name.lower() + + if params['pass_initial_states'] == 'true': + if node.class_name == 'BLSTM': + template = recr_bidir_function_template_initial_states_lstm + else: + template = recr_bidir_function_template_initial_states_gru + else: + template = recr_bidir_function_template + + return template.format(**params) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index 50003fb1b5..cf3a3e8707 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -12,6 +12,7 @@ import numpy as np rnn_layers = ['SimpleRNN', 'LSTM', 'GRU'] +merge_modes = ['sum', 'mul', 'concat', 'ave'] @keras_handler(*rnn_layers) @@ -121,6 +122,7 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade layer = parse_default_keras_layer(rnn_layer, input_names) layer['name'] = keras_layer['config']['name'] + layer['class_name'] = 'B' + layer['class_name'] layer['direction'] = 'bidirectional' layer['return_sequences'] = rnn_layer['config']['return_sequences'] @@ -138,26 +140,27 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade layer['n_timesteps'] = input_shapes[0][1] layer['n_in'] = input_shapes[0][2] - layer['n_out'] = 2*rnn_layer['config']['units'] + assert keras_layer['config']['merge_mode'] in merge_modes + layer['merge_mode'] = keras_layer['config']['merge_mode'] + layer['n_out'] = rnn_layer['config']['units'] + if keras_layer['config']['merge_mode'] == 'concat': + layer['n_out'] *= 2 if 'SimpleRNN' in layer['class_name']: cell_name = 'simple_rnn' else: - cell_name = layer['class_name'].lower() - weight_data_f, recurrent_weight_data_f, bias_data_f = get_weights_data( - data_reader, layer['name'], [f'forward_{cell_name}/{cell_name}_cell/kernel', - f'forward_{cell_name}/{cell_name}_cell/recurrent_kernel', - f'forward_{cell_name}/{cell_name}_cell/bias'] + cell_name = rnn_layer['class_name'].lower() + layer['weight_data'], layer['recurrent_weight_data'], layer['bias_data'] = get_weights_data( + data_reader, layer['name'], [f'{cell_name}_cell/kernel', + f'{cell_name}_cell/recurrent_kernel', + f'{cell_name}_cell/bias'] ) - weight_data_b, recurrent_weight_data_b, bias_data_b = get_weights_data( - data_reader, layer['name'], [f'backward_{cell_name}/{cell_name}_cell/kernel', - f'backward_{cell_name}/{cell_name}_cell/recurrent_kernel', - f'backward_{cell_name}/{cell_name}_cell/bias'] + layer['weight_b_data'], layer['recurrent_weight_b_data'], layer['bias_b_data'] = get_weights_data( + data_reader, layer['name'], [f'{cell_name}_cell/kernel', + f'{cell_name}_cell/recurrent_kernel', + f'{cell_name}_cell/bias'] ) - layer['weight_data'] = np.stack((weight_data_f, weight_data_b), axis=0) - layer['recurrent_weight_data'] = np.stack((recurrent_weight_data_f, recurrent_weight_data_b), axis=0) - layer['bias_data'] = np.stack((bias_data_f, bias_data_b), axis=0) if 'GRU' in layer['class_name']: layer['apply_reset_gate'] = 'after' if rnn_layer['config']['reset_after'] else 'before' @@ -165,8 +168,11 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade # biases array is actually a 2-dim array of arrays (bias + recurrent bias) # both arrays have shape: n_units * 3 (z, r, h_cand) biases = layer['bias_data'] + biases_b = layer['bias_b_data'] layer['bias_data'] = biases[0] layer['recurrent_bias_data'] = biases[1] + layer['bias_b_data'] = biases_b[0] + layer['recurrent_bias_b_data'] = biases_b[1] if layer['return_sequences']: output_shape = [input_shapes[0][0], layer['n_timesteps'], layer['n_out']] diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index c22be3a2ca..280eab6ed4 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1353,7 +1353,7 @@ class LSTM(Layer): Attribute('return_sequences', value_type=bool, default=False), Attribute('return_state', value_type=bool, default=False), Attribute('pass_initial_states', value_type=bool, default=False), - ChoiceAttribute('direction', ['forward', 'backward'], default='forward'), + ChoiceAttribute('direction', ['forward', 'backward', 'bidirectional'], configurable=False, default='forward'), Attribute('time_major', value_type=bool, default=False), WeightAttribute('weight'), WeightAttribute('bias'), @@ -1384,7 +1384,7 @@ def initialize(self): self.add_output_variable( state_shape, state_dims, out_name=self.outputs[2], var_name='layer{index}_c', type_name='layer{index}_c_t' ) - + # weights self.add_weights() @@ -1402,6 +1402,39 @@ def initialize(self): self.add_weights_variable(name='recurrent_bias', var_name='br{index}', data=recurrent_bias) +class BLSTM(LSTM): + _expected_attributes = [ + WeightAttribute('weight_b'), + WeightAttribute('bias_b'), + WeightAttribute('recurrent_weight_b'), + WeightAttribute('recurrent_bias_b'), + TypeAttribute('weight_b'), + TypeAttribute('bias_b'), + TypeAttribute('recurrent_weight_b'), + TypeAttribute('recurrent_bias_b'), + ChoiceAttribute('merge_mode', ['sum', 'mul', 'concat', 'ave'], configurable=False, default='concat'), + ] + def initialize(self): + super().initialize() + + #Add backward layer parameters + # weights + self.add_weights_variable(name='weight_b', var_name='w_b{index}') + + # recurrent weights + self.add_weights_variable(name='recurrent_weight_b', var_name='wr_b{index}') + + # biases + self.add_weights_variable(name='bias_b', var_name='b_b{index}') + + if "pytorch" in self.attributes.keys(): + self.add_weights_variable(name='recurrent_bias_b', var_name='br_b{index}') + else: + recurrent_weight_b = self.get_attr('recurrent_weight_b_data') + recurrent_bias_b = np.zeros(recurrent_weight_b.shape[1]) + self.add_weights_variable(name='recurrent_bias_b', var_name='br_b{index}', data=recurrent_bias_b) + + class GRU(Layer): _expected_attributes = [ Attribute('n_out'), @@ -1410,9 +1443,9 @@ class GRU(Layer): Attribute('return_sequences', value_type=bool, default=False), Attribute('return_state', value_type=bool, default=False), Attribute('pass_initial_states', value_type=bool, default=False), - ChoiceAttribute('direction', ['forward', 'backward'], default='forward'), + ChoiceAttribute('direction', ['forward', 'backward', 'bidirectional'], configurable=False, default='forward'), Attribute('time_major', value_type=bool, default=False), - ChoiceAttribute('apply_reset_gate', ['before', 'after'], default='after'), + ChoiceAttribute('apply_reset_gate', ['before', 'after'], configurable=False, default='after'), WeightAttribute('weight'), WeightAttribute('bias'), WeightAttribute('recurrent_weight'), @@ -1475,6 +1508,33 @@ def initialize(self): self.add_output_variable(shape, dims) + +class BGRU(GRU): + _expected_attributes = [ + WeightAttribute('weight_b'), + WeightAttribute('bias_b'), + WeightAttribute('recurrent_weight_b'), + WeightAttribute('recurrent_bias_b'), + TypeAttribute('weight_b'), + TypeAttribute('bias_b'), + TypeAttribute('recurrent_weight_b'), + TypeAttribute('recurrent_bias_b'), + ChoiceAttribute('merge_mode', ['sum', 'mul', 'concat', 'ave'], configurable=False, default='concat'), + ] + def initialize(self): + super().initialize() + + #Add backward layer parameters + # weights + self.add_weights_variable(name='weight_b', var_name='w_b{index}') + + # recurrent weights + self.add_weights_variable(name='recurrent_weight_b', var_name='wr_b{index}') + + # biases + self.add_weights_variable(name='bias_b', var_name='b_b{index}') + self.add_weights_variable(name='recurrent_bias_b', var_name='br_b{index}') + class GarNet(Layer): ref_impl = False @@ -1764,6 +1824,8 @@ def initialize(self): 'SimpleRNN': SimpleRNN, 'LSTM': LSTM, 'GRU': GRU, + 'BLSTM': BLSTM, + 'BGRU': BGRU, 'QSimpleRNN': SimpleRNN, 'QLSTM': LSTM, 'QGRU': GRU, diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h index 618767dcb5..2fd4af4b29 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h @@ -35,6 +35,14 @@ struct lstm_config { template using activation_recr = nnet::activation::relu; template using activation = nnet::activation::relu; }; + +struct blstm_config : lstm_config{ + // Internal data type definitions + typedef float weight_b_t; + typedef float recurrent_weight_b_t; + typedef float bias_b_t; + typedef float recurrent_bias_b_t; +}; // Long Short term Memory NN (LSTM) // Resources: // https://github.com/nicodjimenez/lstm/blob/master/lstm.py @@ -234,6 +242,76 @@ void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CO } } + +template +void blstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], + typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { + + res_T h_newstate[CONFIG_T::n_state]; + res_T s_newstate[CONFIG_T::n_state]; + data_T data_in[CONFIG_T::n_in]; + res_T h_newstate_back[CONFIG_T::n_state]; + res_T s_newstate_back[CONFIG_T::n_state]; + data_T data_in_back[CONFIG_T::n_in]; + bool reset_state = true; + + #pragma HLS ARRAY_PARTITION variable=h_newstate complete + #pragma HLS ARRAY_PARTITION variable=s_newstate complete + #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete + #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete + + for (int ii = 0; ii < CONFIG_T::n_state; ii++) { + #pragma HLS UNROLL + h_newstate[ii] = 0; + s_newstate[ii] = 0; + h_newstate_back[ii] = 0; + s_newstate_back[ii] = 0; + + } + for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { + for (int j = 0; j < CONFIG_T::n_in; j++) { + #pragma HLS UNROLL + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[CONFIG_T::n_in -1 -j + iloop * CONFIG_T::n_in]; + //printf("%u", j + iloop * CONFIG_T::n_in); + //printf("%u", CONFIG_T::n_in -1 -j + iloop * CONFIG_T::n_in); + } + if (CONFIG_T::use_static) { + nnet::lstm_static(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, + param_br); + nnet::lstm_static(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, param_r_back, param_b_back, + param_br_back); + } + else { + nnet::lstm(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, + param_br); + nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, param_r_back, param_b_back, + param_br_back); + } + if (CONFIG_T::n_sequence_out > 1) + for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate[j]; + res[i+CONFIG_T::n_state] = h_newstate_back[j]; + } + reset_state = false; + } + if (CONFIG_T::n_sequence_out == 1) + for (int i = 0; i < (CONFIG_T::n_state); i++) { + #pragma HLS UNROLL + res[i] = h_newstate[i]; + res[i+CONFIG_T::n_state] = h_newstate_back[i]; + } +} + + template void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newstate[CONFIG_T::n_state], s_T s_newstate[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], @@ -269,6 +347,54 @@ void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newsta } } +template +void blstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newstate[CONFIG_T::n_state], + s_T s_newstate[CONFIG_T::n_state], h_T h_newstate_back[CONFIG_T::n_state], + s_T s_newstate_back[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], + typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { + + data_T data_in[CONFIG_T::n_in]; + data_T data_in_back[CONFIG_T::n_in]; + bool reset_state = true; + + #pragma HLS ARRAY_PARTITION variable=h_newstate complete + #pragma HLS ARRAY_PARTITION variable=s_newstate complete + #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete + #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete + + for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { + for (int j = 0; j < CONFIG_T::n_in; j++) { + #pragma HLS UNROLL + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[CONFIG_T::n_in -1 -j + iloop * CONFIG_T::n_in]; + } + nnet::lstm(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, + param_br); + nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, param_r_back, param_b_back, + param_br_back); + if (CONFIG_T::n_sequence_out > 1) + for (int i = CONFIG_T::n_state *2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate[j]; + res[i+CONFIG_T::n_state] = h_newstate_back[j]; + } + reset_state = false; + } + if (CONFIG_T::n_sequence_out == 1) + for (int i = 0; i < (CONFIG_T::n_state); i++) { + #pragma HLS UNROLL + res[i] = h_newstate[i]; + res[i+CONFIG_T::n_state] = h_newstate_back[i]; + } +} + template void lstm_stack(hls::stream &data_stream, hls::stream &res_stream, typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], @@ -332,6 +458,80 @@ void lstm_stack(hls::stream &data_stream, hls::stream &res_stream } } +template +void blstm_stack(hls::stream &data_stream, hls::stream &res_stream, + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], + typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { + + typename res_T::value_type h_newstate[CONFIG_T::n_state]; + typename res_T::value_type s_newstate[CONFIG_T::n_state]; + typename res_T::value_type h_newstate_back[CONFIG_T::n_state]; + typename res_T::value_type s_newstate_back[CONFIG_T::n_state]; + #pragma HLS ARRAY_PARTITION variable=h_newstate complete + #pragma HLS ARRAY_PARTITION variable=s_newstate complete + #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete + #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete + + for (int ii = 0; ii < CONFIG_T::n_state; ii++) { + #pragma HLS UNROLL + h_newstate[ii] = 0; + s_newstate[ii] = 0; + h_newstate_back[ii] = 0; + s_newstate_back[ii] = 0; + } + + typename data_T::value_type data_in[CONFIG_T::n_in]; + typename data_T::value_type data_in_back[CONFIG_T::n_in]; + bool reset_state = true; + +DataPropagation: + for (int i_in = 0; i_in < CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size; i_in++) { + if (CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size > 1) { + // #pragma HLS PIPELINE + } + data_T data_pack = data_stream.read(); + DataPack: + for (int i_pack = 0; i_pack < data_T::size; i_pack++) { + #pragma HLS UNROLL + data_in[i_pack] = data_pack[i_pack]; + } + if (CONFIG_T::use_static) + nnet::lstm_static( + reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); + else + nnet::lstm( + reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); + if (CONFIG_T::n_sequence_out > 1) { + res_T res_pack; + PRAGMA_DATA_PACK(res_pack) + ResPack_sequences: + for (int i_pack = 0; i_pack < res_T::size; i_pack++) { + #pragma HLS UNROLL + res_pack[i_pack] = h_newstate[i_pack]; + } + res_stream.write(res_pack); + } + reset_state = false; + } + + if (CONFIG_T::n_sequence_out == 1) { + res_T res_pack; + PRAGMA_DATA_PACK(res_pack) + ResPack: + for (int i_pack = 0; i_pack < res_T::size; i_pack++) { + #pragma HLS UNROLL + res_pack[i_pack] = h_newstate[i_pack]; + } + res_stream.write(res_pack); + } +} + // Struct for the GRU template struct gru_config { From fc1e950692fe61cf50172f3dc38d89ed8c931bfa Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 25 Apr 2025 14:46:33 +0200 Subject: [PATCH 03/26] ADD fixes --- .../vivado/passes/recurrent_templates.py | 24 ++++++++++++------- hls4ml/converters/keras_v2_to_hls.py | 2 +- hls4ml/model/layers.py | 16 +++++++------ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/hls4ml/backends/vivado/passes/recurrent_templates.py b/hls4ml/backends/vivado/passes/recurrent_templates.py index ea81d294c0..84fb476483 100644 --- a/hls4ml/backends/vivado/passes/recurrent_templates.py +++ b/hls4ml/backends/vivado/passes/recurrent_templates.py @@ -1,6 +1,10 @@ from hls4ml.backends.backend import get_backend from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate +<<<<<<< HEAD from hls4ml.model.layers import GRU, LSTM, BLSTM, BGRU, TimeDistributed +======= +from hls4ml.model.layers import BGRU, BLSTM, GRU, LSTM +>>>>>>> d2d3b452 (ADD fixes) # recurrent multiplication template @@ -120,7 +124,7 @@ recr_function_template_initial_states_lstm = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 recr_function_template_initial_states_gru = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 -recr_bidir_function_template = 'nnet::{recr_type}_stack<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br}, {w_b}, {wr_b}, {b_b}, {br_b});' +recr_bidir_function_template = 'nnet::{recr_type}_stack<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br}, {w_b}, {wr_b}, {b_b}, {br_b});' # noqa: E501 recr_bidir_function_template_initial_states_lstm = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 recr_bidir_function_template_initial_states_gru = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 @@ -240,6 +244,7 @@ def format(self, node): return mult_config1 + '\n' + mult_config2 + '\n' + recr_act_config + '\n' + act_config + '\n' + recr_config + class BidirectionalRecurrentConfigTemplate(LayerConfigTemplate): def __init__(self): super().__init__((BLSTM, BGRU)) @@ -256,11 +261,11 @@ def format(self, node): params['n_sequence'] = node.get_input_variable().dim_names[0] if node.get_attr('return_sequences'): params['n_sequence_out'] = node.get_output_variable().dim_names[0] - params['n_state'] = node.get_output_variable().dim_names[1] + params['n_state'] = f'{node.get_output_variable().dim_names[1]} / 2' params['n_out'] = node.get_output_variable().dim_names[1] else: params['n_sequence_out'] = 1 - params['n_state'] = node.get_output_variable().dim_names[0] + params['n_state'] = f'{node.get_output_variable().dim_names[0]} / 2' params['n_out'] = node.get_output_variable().dim_names[0] params['config_mult_t1'] = f'config{node.index}_1' params['config_mult_t2'] = f'config{node.index}_2' @@ -299,9 +304,9 @@ def format(self, node): mult_params1['n_in'] = node.get_input_variable().shape[1] if node.get_attr('return_sequences'): - mult_params1['n_out'] = node.get_output_variable().shape[1] / 2 * n_recr_mult + mult_params1['n_out'] = node.get_output_variable().shape[1] // 2 * n_recr_mult else: - mult_params1['n_out'] = node.get_output_variable().shape[0] / 2 * n_recr_mult + mult_params1['n_out'] = node.get_output_variable().shape[0] // 2 * n_recr_mult mult_params1['product_type'] = get_backend('vivado').product_type( node.get_input_variable().type.precision, node.get_weights('weight').type.precision ) @@ -324,11 +329,11 @@ def format(self, node): mult_params1['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}_1' if node.get_attr('return_sequences'): - mult_params2['n_in'] = node.get_output_variable().shape[1] / 2 - mult_params2['n_out'] = node.get_output_variable().shape[1] / 2 * n_recr_mult + mult_params2['n_in'] = node.get_output_variable().shape[1] // 2 + mult_params2['n_out'] = node.get_output_variable().shape[1] // 2 * n_recr_mult else: - mult_params2['n_in'] = node.get_output_variable().shape[0] / 2 - mult_params2['n_out'] = node.get_output_variable().shape[0] / 2 * n_recr_mult + mult_params2['n_in'] = node.get_output_variable().shape[0] // 2 + mult_params2['n_out'] = node.get_output_variable().shape[0] // 2 * n_recr_mult mult_params2['product_type'] = get_backend('vivado').product_type( node.get_input_variable().type.precision, node.get_weights('recurrent_weight').type.precision ) @@ -353,6 +358,7 @@ def format(self, node): return mult_config1 + '\n' + mult_config2 + '\n' + recr_act_config + '\n' + act_config + '\n' + recr_config + class RecurrentFunctionTemplate(FunctionCallTemplate): def __init__(self): super().__init__((LSTM, GRU), include_header=recr_include_list) diff --git a/hls4ml/converters/keras_v2_to_hls.py b/hls4ml/converters/keras_v2_to_hls.py index 08962b96d6..f5d3cd1149 100644 --- a/hls4ml/converters/keras_v2_to_hls.py +++ b/hls4ml/converters/keras_v2_to_hls.py @@ -241,7 +241,7 @@ def parse_keras_model(model_arch, reader): 'HGQ>UnaryLUT', ] # Recurrent layers - recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU', 'QSimpleRNN', 'QLSTM', 'QGRU'] + recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU', 'QSimpleRNN', 'QLSTM', 'QGRU', 'BLSTM', 'BGRU'] # All supported layers supported_layers = get_supported_keras_layers() + skip_layers diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 280eab6ed4..fb151f4c36 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1384,7 +1384,7 @@ def initialize(self): self.add_output_variable( state_shape, state_dims, out_name=self.outputs[2], var_name='layer{index}_c', type_name='layer{index}_c_t' ) - + # weights self.add_weights() @@ -1403,7 +1403,7 @@ def initialize(self): class BLSTM(LSTM): - _expected_attributes = [ + _expected_attributes = LSTM._expected_attributes + [ WeightAttribute('weight_b'), WeightAttribute('bias_b'), WeightAttribute('recurrent_weight_b'), @@ -1414,10 +1414,11 @@ class BLSTM(LSTM): TypeAttribute('recurrent_bias_b'), ChoiceAttribute('merge_mode', ['sum', 'mul', 'concat', 'ave'], configurable=False, default='concat'), ] + def initialize(self): super().initialize() - - #Add backward layer parameters + + # Add backward layer parameters # weights self.add_weights_variable(name='weight_b', var_name='w_b{index}') @@ -1510,7 +1511,7 @@ def initialize(self): class BGRU(GRU): - _expected_attributes = [ + _expected_attributes = GRU._expected_attributes + [ WeightAttribute('weight_b'), WeightAttribute('bias_b'), WeightAttribute('recurrent_weight_b'), @@ -1521,10 +1522,11 @@ class BGRU(GRU): TypeAttribute('recurrent_bias_b'), ChoiceAttribute('merge_mode', ['sum', 'mul', 'concat', 'ave'], configurable=False, default='concat'), ] + def initialize(self): super().initialize() - - #Add backward layer parameters + + # Add backward layer parameters # weights self.add_weights_variable(name='weight_b', var_name='w_b{index}') From f47bb5a49bb834020ae3e4083a8423467974c213 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 10:08:47 +0200 Subject: [PATCH 04/26] FIX resource strategy --- hls4ml/backends/fpga/fpga_backend.py | 18 +++++++++++++++ .../vivado/passes/resource_strategy.py | 22 ++++++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index 4896c25f9f..2bdc90baa3 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -14,6 +14,8 @@ Activation, BatchNormalization, BatchNormOnnx, + BidirectionalGRU, + BidirectionalLSTM, Conv, Conv1D, Conv2D, @@ -68,6 +70,8 @@ def __init__(self, name): SimpleRNN, LSTM, GRU, + BidirectionalLSTM, + BidirectionalGRU, Dot, Conv, MatMul, @@ -213,6 +217,13 @@ def get_layer_mult_size(self, layer): n_out = layer.get_attr('n_filt') return n_in, n_out + if 'BidirectionalLSTM' in layer.class_name: + n_in = layer.get_attr('n_in') + n_out = layer.get_attr('n_out') * 2 # /2*4 + n_in_recr = layer.get_attr('n_out') // 2 + n_out_recr = n_out + return n_in, n_out, n_in_recr, n_out_recr + if 'LSTM' in layer.class_name: n_in = layer.get_attr('n_in') n_out = layer.get_attr('n_out') * 4 @@ -220,6 +231,13 @@ def get_layer_mult_size(self, layer): n_out_recr = n_out return n_in, n_out, n_in_recr, n_out_recr + if 'BidirectionalGRU' in layer.class_name: + n_in = layer.get_attr('n_in') + n_out = layer.get_attr('n_out') // 2 * 3 + n_in_recr = layer.get_attr('n_out') // 2 + n_out_recr = n_out + return n_in, n_out, n_in_recr, n_out_recr + if 'GRU' in layer.class_name: n_in = layer.get_attr('n_in') n_out = layer.get_attr('n_out') * 3 diff --git a/hls4ml/backends/vivado/passes/resource_strategy.py b/hls4ml/backends/vivado/passes/resource_strategy.py index 0c06190f30..bbd030c786 100644 --- a/hls4ml/backends/vivado/passes/resource_strategy.py +++ b/hls4ml/backends/vivado/passes/resource_strategy.py @@ -1,6 +1,16 @@ import numpy as np -from hls4ml.model.layers import GRU, LSTM, Conv1D, Conv2D, Dense, SeparableConv1D, SeparableConv2D +from hls4ml.model.layers import ( + GRU, + LSTM, + BidirectionalGRU, + BidirectionalLSTM, + Conv1D, + Conv2D, + Dense, + SeparableConv1D, + SeparableConv2D, +) from hls4ml.model.optimizer import OptimizerPass @@ -8,10 +18,11 @@ class ApplyResourceStrategy(OptimizerPass): '''Transposes the weights to use the dense_resource matrix multiply routine''' def match(self, node): - node_matches = isinstance(node, (Dense, Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, LSTM, GRU)) + node_matches = isinstance( + node, (Dense, Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, LSTM, GRU, BidirectionalLSTM, BidirectionalGRU) + ) is_resource_strategy = node.get_attr('strategy', '').lower() in ['resource', 'resource_unrolled'] already_transformed = node.get_attr('_weights_transposed', False) is True - return node_matches and is_resource_strategy and not already_transformed def transform(self, model, node): @@ -37,6 +48,11 @@ def transform(self, model, node): node.weights['pointwise'].data = np.transpose( node.weights['pointwise'].data, axes=[3, 0, 1, 2] ) # (H,W,C,F) => (F,H,W,C) + elif isinstance(node, (BidirectionalLSTM, BidirectionalGRU)): + node.weights['weight'].data = np.transpose(node.weights['weight'].data) + node.weights['recurrent_weight'].data = np.transpose(node.weights['recurrent_weight'].data) + node.weights['weight_b'].data = np.transpose(node.weights['weight_b'].data) + node.weights['recurrent_weight_b'].data = np.transpose(node.weights['recurrent_weight_b'].data) elif isinstance(node, (LSTM, GRU)): node.weights['weight'].data = np.transpose(node.weights['weight'].data) node.weights['recurrent_weight'].data = np.transpose(node.weights['recurrent_weight'].data) From c4697930e1107f9590611c5c4eabb814a75e8917 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 10:48:43 +0200 Subject: [PATCH 05/26] FIX infer precision for bidirectional rnn --- hls4ml/model/optimizer/passes/infer_precision.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index 919bc0c3c2..21c05c98f6 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -81,7 +81,7 @@ def _infer_precision(self, node, types_to_infer): if node_class in ['Embedding']: return self._infer_embedding_precision(node, types_to_infer) - if node_class in ['SimpleRNN', 'LSTM', 'GRU']: + if node_class in ['SimpleRNN', 'LSTM', 'GRU', 'BidirectionalLSTM', 'BidirectionalGRU']: return self._infer_rnn_precision(node, types_to_infer) if node_class in ['ParametrizedActivation']: @@ -553,7 +553,10 @@ def _infer_rnn_precision(self, node, types_to_infer): inferred_types = [] # for now just do the weights and leave the rest for the default catch - for weightvar in ('weight', 'bias', 'recurrent_weight', 'recurrent_bias'): + rnn_weights = ('weight', 'bias', 'recurrent_weight', 'recurrent_bias') + if node.attributes['direction'] == 'bidirectional': + rnn_weights += ('weight_b', 'bias_b', 'recurrent_weight_b', 'recurrent_bias_b') + for weightvar in rnn_weights: if f'{weightvar}_t' in types_to_infer: self._infer_default_type(node, f'{weightvar}_t') node.weights[weightvar].update_precision(node.types[f'{weightvar}_t'].precision) From 4c3d26ee6b421f7798ef964b54ee880991bb3f0f Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 10:50:20 +0200 Subject: [PATCH 06/26] FIX eliminate activation after bidirectional rnn --- hls4ml/converters/keras_v2_to_hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hls4ml/converters/keras_v2_to_hls.py b/hls4ml/converters/keras_v2_to_hls.py index f5d3cd1149..14e68c7713 100644 --- a/hls4ml/converters/keras_v2_to_hls.py +++ b/hls4ml/converters/keras_v2_to_hls.py @@ -241,7 +241,7 @@ def parse_keras_model(model_arch, reader): 'HGQ>UnaryLUT', ] # Recurrent layers - recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU', 'QSimpleRNN', 'QLSTM', 'QGRU', 'BLSTM', 'BGRU'] + recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU', 'QSimpleRNN', 'QLSTM', 'QGRU', 'BidirectionalLSTM', 'BidirectionalGRU'] # All supported layers supported_layers = get_supported_keras_layers() + skip_layers From 5eef679ad0e05c968e309e94131c3dad3989da8a Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 10:55:55 +0200 Subject: [PATCH 07/26] FIX bidirectional layers name --- .../vivado/passes/recurrent_templates.py | 16 ++++++---------- hls4ml/backends/vivado/vivado_backend.py | 8 +++----- hls4ml/model/layers.py | 12 ++++++------ 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/hls4ml/backends/vivado/passes/recurrent_templates.py b/hls4ml/backends/vivado/passes/recurrent_templates.py index 84fb476483..33fbb5406e 100644 --- a/hls4ml/backends/vivado/passes/recurrent_templates.py +++ b/hls4ml/backends/vivado/passes/recurrent_templates.py @@ -1,10 +1,6 @@ from hls4ml.backends.backend import get_backend from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate -<<<<<<< HEAD -from hls4ml.model.layers import GRU, LSTM, BLSTM, BGRU, TimeDistributed -======= -from hls4ml.model.layers import BGRU, BLSTM, GRU, LSTM ->>>>>>> d2d3b452 (ADD fixes) +from hls4ml.model.layers import GRU, LSTM, BidirectionalLSTM, BidirectionalGRU, TimeDistributed # recurrent multiplication template @@ -247,7 +243,7 @@ def format(self, node): class BidirectionalRecurrentConfigTemplate(LayerConfigTemplate): def __init__(self): - super().__init__((BLSTM, BGRU)) + super().__init__((BidirectionalLSTM, BidirectionalGRU)) self.template = bidir_recr_config_template self.act_template = activ_config_template self.recr_act_template = recr_activ_config_template @@ -275,11 +271,11 @@ def format(self, node): params['static'] = 'true' if node.attributes['static'] else 'false' params['pytorch'] = 'true' if node.get_attr('pytorch', False) else 'false' params['recr_type'] = node.class_name.lower() - params['RECR_TYPE'] = node.class_name[1:] + params['RECR_TYPE'] = node.class_name[13:] - if node.class_name == 'BLSTM': + if node.class_name == 'BidirectionalLSTM': n_recr_mult = 4 - else: # BGRU + else: # BidirectionalGRU n_recr_mult = 3 recr_config = self.template.format(**params) @@ -458,7 +454,7 @@ def format(self, node): class BidirectionalRecurrentFunctionTemplate(FunctionCallTemplate): def __init__(self): - super().__init__((BLSTM, BGRU), include_header=recr_include_list) + super().__init__((BidirectionalLSTM, BidirectionalGRU), include_header=recr_include_list) def format(self, node): params = self._default_function_params(node) diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index d7b58c6e44..a26a1a89c3 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -12,6 +12,8 @@ from hls4ml.model.layers import ( GRU, LSTM, + BidirectionalGRU, + BidirectionalLSTM, Conv1D, Conv2D, Dense, @@ -46,11 +48,7 @@ def __init__(self): def _register_layer_attributes(self): # Add RNN-specific attributes, recurrent_reuse_factor and static implementation - rnn_layers = [ - SimpleRNN, - LSTM, - GRU, - ] + rnn_layers = [SimpleRNN, LSTM, GRU, BidirectionalLSTM, BidirectionalGRU] for layer in rnn_layers: attrs = self.attribute_map.get(layer, []) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index fb151f4c36..e2e93046bc 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1402,8 +1402,8 @@ def initialize(self): self.add_weights_variable(name='recurrent_bias', var_name='br{index}', data=recurrent_bias) -class BLSTM(LSTM): - _expected_attributes = LSTM._expected_attributes + [ +class BidirectionalLSTM(LSTM): + _expected_attributes = [ WeightAttribute('weight_b'), WeightAttribute('bias_b'), WeightAttribute('recurrent_weight_b'), @@ -1510,8 +1510,8 @@ def initialize(self): self.add_output_variable(shape, dims) -class BGRU(GRU): - _expected_attributes = GRU._expected_attributes + [ +class BidirectionalGRU(GRU): + _expected_attributes = [ WeightAttribute('weight_b'), WeightAttribute('bias_b'), WeightAttribute('recurrent_weight_b'), @@ -1826,8 +1826,8 @@ def initialize(self): 'SimpleRNN': SimpleRNN, 'LSTM': LSTM, 'GRU': GRU, - 'BLSTM': BLSTM, - 'BGRU': BGRU, + 'BidirectionalLSTM': BidirectionalLSTM, + 'BidirectionalGRU': BidirectionalGRU, 'QSimpleRNN': SimpleRNN, 'QLSTM': LSTM, 'QGRU': GRU, From a9546c76654d335ece4d03a567d83ef5f36bd677 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 11:09:24 +0200 Subject: [PATCH 08/26] ADD tests for bidirectional layer --- test/pytest/test_rnn.py | 131 +++++++++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 48 deletions(-) diff --git a/test/pytest/test_rnn.py b/test/pytest/test_rnn.py index d2303669fe..85ea9eb8f2 100644 --- a/test/pytest/test_rnn.py +++ b/test/pytest/test_rnn.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from tensorflow.keras.layers import GRU, LSTM, Input, SimpleRNN +from tensorflow.keras.layers import GRU, LSTM, Bidirectional, Input, SimpleRNN from tensorflow.keras.models import Model, Sequential import hls4ml @@ -14,13 +14,21 @@ @pytest.mark.parametrize('rnn_layer', rnn_layers) @pytest.mark.parametrize('return_sequences', [True, False]) -def test_rnn_parsing(rnn_layer, return_sequences): +@pytest.mark.parametrize('bidirectional', [True, False]) +def test_rnn_parsing(rnn_layer, return_sequences, bidirectional): + + if rnn_layer is SimpleRNN and bidirectional: + pytest.skip("SimpleRNN does not support bidirectional layers") + time_steps = 3 input_size = 8 input_shape = (time_steps, input_size) model_input = Input(shape=input_shape) - model_output = rnn_layer(64, return_sequences=return_sequences)(model_input) + if not bidirectional: + model_output = rnn_layer(64, return_sequences=return_sequences)(model_input) + else: + model_output = Bidirectional(rnn_layer(64, return_sequences=return_sequences))(model_input) model = Model(model_input, model_output) model.compile(optimizer='adam', loss='mse') @@ -34,13 +42,26 @@ def test_rnn_parsing(rnn_layer, return_sequences): keras_layer = model.layers[1] # Basic sanity check, I/O, activations - assert hls_layer.class_name == rnn_layer.__name__ - assert hls_layer.attributes['n_out'] == keras_layer.units - assert hls_layer.attributes['activation'] == keras_layer.activation.__name__ - if 'recurrent_activation' in hls_layer.attributes: # SimpleRNN doesn't have this - assert hls_layer.attributes['recurrent_activation'] == keras_layer.recurrent_activation.__name__ - assert hls_layer.get_input_variable().shape == list(input_shape) - assert hls_layer.get_output_variable().shape == model_output.shape.as_list()[1:] # Ignore the batch size + if not bidirectional: + assert hls_layer.class_name == rnn_layer.__name__ + assert hls_layer.attributes['n_out'] == keras_layer.units + assert hls_layer.attributes['activation'] == keras_layer.activation.__name__ + if 'recurrent_activation' in hls_layer.attributes: # SimpleRNN doesn't have this + assert hls_layer.attributes['recurrent_activation'] == keras_layer.recurrent_activation.__name__ + assert hls_layer.get_input_variable().shape == list(input_shape) + assert hls_layer.get_output_variable().shape == model_output.shape.as_list()[1:] # Ignore the batch size + else: + assert hls_layer.class_name == 'Bidirectional' + rnn_layer.__name__ + assert hls_layer.attributes['merge_mode'] == keras_layer.merge_mode + if hls_layer.attributes['merge_mode'] == 'concat': + assert hls_layer.attributes['n_out'] == 2 * keras_layer.forward_layer.units + else: + assert hls_layer.attributes['n_out'] == keras_layer.forward_layer.units + assert hls_layer.attributes['activation'] == keras_layer.forward_layer.activation.__name__ + if 'recurrent_activation' in hls_layer.attributes: # SimpleRNN doesn't have this + assert hls_layer.attributes['recurrent_activation'] == keras_layer.forward_layer.recurrent_activation.__name__ + assert hls_layer.get_input_variable().shape == list(input_shape) + assert hls_layer.get_output_variable().shape == model_output.shape.as_list()[1:] # Ignore the batch size # Compare weights hls_weights = list(hls_layer.get_weights()) # [weights, recurrent_weights, bias, recurrent_bias] @@ -66,54 +87,66 @@ def test_rnn_parsing(rnn_layer, return_sequences): @pytest.mark.parametrize( - 'rnn_layer, backend, io_type, strategy', + 'rnn_layer, bidirectional, backend, io_type, strategy', [ - (SimpleRNN, 'Quartus', 'io_parallel', 'resource'), - (SimpleRNN, 'oneAPI', 'io_parallel', 'resource'), - (LSTM, 'Vivado', 'io_parallel', 'resource'), - (LSTM, 'Vivado', 'io_parallel', 'latency'), - (LSTM, 'Vitis', 'io_parallel', 'resource'), - (LSTM, 'Vitis', 'io_parallel', 'latency'), - (LSTM, 'Quartus', 'io_parallel', 'resource'), - (LSTM, 'oneAPI', 'io_parallel', 'resource'), - (LSTM, 'Vivado', 'io_stream', 'resource'), - (LSTM, 'Vivado', 'io_stream', 'latency'), - (LSTM, 'Vitis', 'io_stream', 'resource'), - (LSTM, 'Vitis', 'io_stream', 'latency'), - (GRU, 'Vivado', 'io_parallel', 'resource'), - (GRU, 'Vivado', 'io_parallel', 'latency'), - (GRU, 'Vitis', 'io_parallel', 'resource'), - (GRU, 'Vitis', 'io_parallel', 'latency'), - (GRU, 'Quartus', 'io_parallel', 'resource'), - (GRU, 'oneAPI', 'io_parallel', 'resource'), - (GRU, 'Vivado', 'io_stream', 'resource'), - (GRU, 'Vivado', 'io_stream', 'latency'), - (GRU, 'Vitis', 'io_stream', 'resource'), - (GRU, 'Vitis', 'io_stream', 'latency'), - (GRU, 'Quartus', 'io_stream', 'resource'), - (GRU, 'oneAPI', 'io_stream', 'resource'), + (SimpleRNN, False, 'Quartus', 'io_parallel', 'resource'), + (SimpleRNN, False, 'oneAPI', 'io_parallel', 'resource'), + (LSTM, False, 'Vivado', 'io_parallel', 'resource'), + (LSTM, False, 'Vivado', 'io_parallel', 'latency'), + (LSTM, False, 'Vitis', 'io_parallel', 'resource'), + (LSTM, False, 'Vitis', 'io_parallel', 'latency'), + (LSTM, True, 'Vivado', 'io_parallel', 'resource'), + (LSTM, True, 'Vivado', 'io_parallel', 'latency'), + (LSTM, True, 'Vitis', 'io_parallel', 'resource'), + (LSTM, True, 'Vitis', 'io_parallel', 'latency'), + (LSTM, False, 'Quartus', 'io_parallel', 'resource'), + (LSTM, False, 'oneAPI', 'io_parallel', 'resource'), + (LSTM, False, 'Vivado', 'io_stream', 'resource'), + (LSTM, False, 'Vivado', 'io_stream', 'latency'), + (LSTM, False, 'Vitis', 'io_stream', 'resource'), + (LSTM, False, 'Vitis', 'io_stream', 'latency'), + (GRU, False, 'Vivado', 'io_parallel', 'resource'), + (GRU, False, 'Vivado', 'io_parallel', 'latency'), + (GRU, False, 'Vitis', 'io_parallel', 'resource'), + (GRU, False, 'Vitis', 'io_parallel', 'latency'), + (GRU, True, 'Vivado', 'io_parallel', 'resource'), + (GRU, True, 'Vivado', 'io_parallel', 'latency'), + (GRU, True, 'Vitis', 'io_parallel', 'resource'), + (GRU, True, 'Vitis', 'io_parallel', 'latency'), + (GRU, False, 'Quartus', 'io_parallel', 'resource'), + (GRU, False, 'oneAPI', 'io_parallel', 'resource'), + (GRU, False, 'Vivado', 'io_stream', 'resource'), + (GRU, False, 'Vivado', 'io_stream', 'latency'), + (GRU, False, 'Vitis', 'io_stream', 'resource'), + (GRU, False, 'Vitis', 'io_stream', 'latency'), + (GRU, False, 'Quartus', 'io_stream', 'resource'), + (GRU, False, 'oneAPI', 'io_stream', 'resource'), ], ) @pytest.mark.parametrize('return_sequences', [True, False]) @pytest.mark.parametrize('static', [True, False]) -def test_rnn_accuracy(rnn_layer, return_sequences, backend, io_type, strategy, static): +def test_rnn_accuracy(rnn_layer, bidirectional, return_sequences, backend, io_type, strategy, static): # Subtract 0.5 to include negative values input_shape = (12, 8) X = np.random.rand(50, *input_shape) - 0.5 - layer_name = rnn_layer.__name__ + layer_name = ("Bidirectional" if bidirectional else "") + rnn_layer.__name__ keras_model = Sequential() - keras_model.add( - rnn_layer( - units=32, - input_shape=input_shape, - kernel_initializer='lecun_uniform', - recurrent_initializer='lecun_uniform', - bias_initializer='lecun_uniform', - return_sequences=return_sequences, - name=layer_name, - ) + keras_model.add(Input(shape=input_shape)) + test_layer = rnn_layer( + units=32, + input_shape=input_shape, + kernel_initializer='lecun_uniform', + recurrent_initializer='lecun_uniform', + bias_initializer='lecun_uniform', + return_sequences=return_sequences, + name=layer_name, ) + if not bidirectional: + keras_model.add(test_layer) + else: + keras_model.add(Bidirectional(test_layer, name=layer_name)) + keras_model.compile() default_precision = 'ap_fixed<32, 16>' if backend in ['Vivado', 'Vitis'] else 'ac_fixed<32, 16, true>' @@ -123,7 +156,9 @@ def test_rnn_accuracy(rnn_layer, return_sequences, backend, io_type, strategy, s hls_config['LayerName'][layer_name]['static'] = static hls_config['LayerName'][layer_name]['Strategy'] = strategy prj_name = ( - f'hls4mlprj_rnn_accuracy_{layer_name}_static_{int(static)}_ret_seq_{int(return_sequences)}_' + 'hls4mlprj_rnn_accuracy_' + + ('bidirectional_' if bidirectional else '') + + f'{layer_name}_static_{int(static)}_ret_seq_{int(return_sequences)}_' f'{backend}_{io_type}_{strategy}' ) output_dir = str(test_root_path / prj_name) From 0246dae190868dcb82c410df7ae8a57776bffe37 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 11:35:05 +0200 Subject: [PATCH 09/26] FIX weight name and ADD backward layer architecture check --- hls4ml/converters/keras/recurrent.py | 29 ++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index cf3a3e8707..be04619d0b 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -116,15 +116,19 @@ def parse_time_distributed_layer(keras_layer, input_names, input_shapes, data_re @keras_handler('Bidirectional') def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reader): assert keras_layer['class_name'] == 'Bidirectional' - + rnn_layer = keras_layer['config']['layer'] assert rnn_layer['class_name'] in rnn_layers or rnn_layer['class_name'][1:] in rnn_layers layer = parse_default_keras_layer(rnn_layer, input_names) layer['name'] = keras_layer['config']['name'] - layer['class_name'] = 'B' + layer['class_name'] + layer['class_name'] = 'Bidirectional' + layer['class_name'] layer['direction'] = 'bidirectional' + # TODO Should we handle different architectures for forward and backward layer? + if keras_layer['config'].get('backward_layer'): + raise Exception('Different architectures between forward and backward layers are not supported by hls4ml') + layer['return_sequences'] = rnn_layer['config']['return_sequences'] layer['return_state'] = rnn_layer['config']['return_state'] @@ -147,19 +151,28 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade if keras_layer['config']['merge_mode'] == 'concat': layer['n_out'] *= 2 + rnn_layer_name = rnn_layer['config']['name'] if 'SimpleRNN' in layer['class_name']: cell_name = 'simple_rnn' else: cell_name = rnn_layer['class_name'].lower() layer['weight_data'], layer['recurrent_weight_data'], layer['bias_data'] = get_weights_data( - data_reader, layer['name'], [f'{cell_name}_cell/kernel', - f'{cell_name}_cell/recurrent_kernel', - f'{cell_name}_cell/bias'] + data_reader, + layer['name'], + [ + f'forward_{rnn_layer_name}/{cell_name}_cell/kernel', + f'forward_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', + f'forward_{rnn_layer_name}/{cell_name}_cell/bias', + ], ) layer['weight_b_data'], layer['recurrent_weight_b_data'], layer['bias_b_data'] = get_weights_data( - data_reader, layer['name'], [f'{cell_name}_cell/kernel', - f'{cell_name}_cell/recurrent_kernel', - f'{cell_name}_cell/bias'] + data_reader, + layer['name'], + [ + f'backward_{rnn_layer_name}/{cell_name}_cell/kernel', + f'backward_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', + f'backward_{rnn_layer_name}/{cell_name}_cell/bias', + ], ) if 'GRU' in layer['class_name']: From 7428af716472922762aff8b2b93af448ddac1dc1 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 12:25:49 +0200 Subject: [PATCH 10/26] FIX static and non-static Bidirectional layers --- .../vivado/nnet_utils/nnet_recurrent.h | 285 ++++++++++++++---- 1 file changed, 231 insertions(+), 54 deletions(-) diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h index 2fd4af4b29..ba988c44e2 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h @@ -36,7 +36,7 @@ struct lstm_config { template using activation = nnet::activation::relu; }; -struct blstm_config : lstm_config{ +struct bidirectionallstm_config : lstm_config { // Internal data type definitions typedef float weight_b_t; typedef float recurrent_weight_b_t; @@ -119,7 +119,7 @@ void lstm(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG } } -template +template void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], res_T s_newstate[CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], @@ -197,6 +197,33 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate } } +/* Alternative lstm_static beginning +template +void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], + res_T s_newstate[CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], + bool backward_selector=false) { + // Initialize the state variable -- will maintain state between function calls + + static res_T h_state_forward[CONFIG_T::n_state]; + static res_T s_state_forward[CONFIG_T::n_state]; + res_T *h_state; + res_T *s_state; + if constexpr (bidirectional) { + static res_T h_state_backward[CONFIG_T::n_state]; + static res_T s_state_backward[CONFIG_T::n_state]; + h_state = backward_selector ? h_state_backward : h_state_forward; + s_state = backward_selector ? s_state_backward : s_state_forward; + } + else { + h_state = h_state_forward; + s_state = s_state_forward; + } +*/ + template void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], @@ -242,17 +269,17 @@ void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CO } } - template -void blstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], - typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { +void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], + res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], + typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { res_T h_newstate[CONFIG_T::n_state]; res_T s_newstate[CONFIG_T::n_state]; @@ -273,45 +300,45 @@ void blstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[C s_newstate[ii] = 0; h_newstate_back[ii] = 0; s_newstate_back[ii] = 0; - } for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { for (int j = 0; j < CONFIG_T::n_in; j++) { #pragma HLS UNROLL - data_in[j] = data[j + iloop * CONFIG_T::n_in]; - data_in_back[j] = data[CONFIG_T::n_in -1 -j + iloop * CONFIG_T::n_in]; - //printf("%u", j + iloop * CONFIG_T::n_in); - //printf("%u", CONFIG_T::n_in -1 -j + iloop * CONFIG_T::n_in); + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; } if (CONFIG_T::use_static) { nnet::lstm_static(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); - nnet::lstm_static(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, param_r_back, param_b_back, - param_br_back); - } - else { + nnet::lstm_static(reset_state, data_in_back, h_newstate_back, s_newstate_back, + param_back, param_r_back, param_b_back, param_br_back); + } else { nnet::lstm(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); - nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, param_r_back, param_b_back, - param_br_back); + nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, + param_r_back, param_b_back, param_br_back); } - if (CONFIG_T::n_sequence_out > 1) + if (CONFIG_T::n_sequence_out > 1) { for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { #pragma HLS UNROLL res[i] = h_newstate[j]; - res[i+CONFIG_T::n_state] = h_newstate_back[j]; } + for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; + i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate_back[j]; + } + } reset_state = false; } if (CONFIG_T::n_sequence_out == 1) for (int i = 0; i < (CONFIG_T::n_state); i++) { #pragma HLS UNROLL res[i] = h_newstate[i]; - res[i+CONFIG_T::n_state] = h_newstate_back[i]; + res[i + CONFIG_T::n_state] = h_newstate_back[i]; } } - template void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newstate[CONFIG_T::n_state], s_T s_newstate[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], @@ -348,21 +375,22 @@ void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newsta } template -void blstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newstate[CONFIG_T::n_state], - s_T s_newstate[CONFIG_T::n_state], h_T h_newstate_back[CONFIG_T::n_state], - s_T s_newstate_back[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], - typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { +void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newstate[CONFIG_T::n_state], + s_T s_newstate[CONFIG_T::n_state], h_T h_newstate_back[CONFIG_T::n_state], + s_T s_newstate_back[CONFIG_T::n_state], + res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], + typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { data_T data_in[CONFIG_T::n_in]; data_T data_in_back[CONFIG_T::n_in]; - bool reset_state = true; + bool reset_state = false; #pragma HLS ARRAY_PARTITION variable=h_newstate complete #pragma HLS ARRAY_PARTITION variable=s_newstate complete @@ -372,26 +400,30 @@ void blstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newst for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { for (int j = 0; j < CONFIG_T::n_in; j++) { #pragma HLS UNROLL - data_in[j] = data[j + iloop * CONFIG_T::n_in]; - data_in_back[j] = data[CONFIG_T::n_in -1 -j + iloop * CONFIG_T::n_in]; + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; } - nnet::lstm(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, - param_br); - nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, param_r_back, param_b_back, - param_br_back); - if (CONFIG_T::n_sequence_out > 1) - for (int i = CONFIG_T::n_state *2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { + nnet::lstm(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); + nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, + param_r_back, param_b_back, param_br_back); + if (CONFIG_T::n_sequence_out > 1) { + for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { #pragma HLS UNROLL res[i] = h_newstate[j]; - res[i+CONFIG_T::n_state] = h_newstate_back[j]; } + for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; + i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate_back[j]; + } + } reset_state = false; } if (CONFIG_T::n_sequence_out == 1) for (int i = 0; i < (CONFIG_T::n_state); i++) { #pragma HLS UNROLL res[i] = h_newstate[i]; - res[i+CONFIG_T::n_state] = h_newstate_back[i]; + res[i + CONFIG_T::n_state] = h_newstate_back[i]; } } @@ -458,8 +490,9 @@ void lstm_stack(hls::stream &data_stream, hls::stream &res_stream } } +/* BiDirectional LSTM io_stream implementation: not implemented yet template -void blstm_stack(hls::stream &data_stream, hls::stream &res_stream, +void bidirectionallstm_stack(hls::stream &data_stream, hls::stream &res_stream, typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], @@ -468,7 +501,7 @@ void blstm_stack(hls::stream &data_stream, hls::stream &res_strea typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { - + typename res_T::value_type h_newstate[CONFIG_T::n_state]; typename res_T::value_type s_newstate[CONFIG_T::n_state]; typename res_T::value_type h_newstate_back[CONFIG_T::n_state]; @@ -531,6 +564,7 @@ void blstm_stack(hls::stream &data_stream, hls::stream &res_strea res_stream.write(res_pack); } } +*/ // Struct for the GRU template @@ -562,6 +596,14 @@ struct gru_config { template using activation = nnet::activation::relu; }; +struct bidirectionalgru_config : gru_config { + // Internal data type definitions + typedef float weight_b_t; + typedef float recurrent_weight_b_t; + typedef float bias_b_t; + typedef float recurrent_bias_b_t; +}; + template void gru(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], // TODO - Check the layout of the param @@ -635,15 +677,14 @@ void gru(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_ } } -template +template void gru_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3]) { - // Initialize the state variable -- will maintain state between function calls - static res_T h_state[CONFIG_T::n_state]; + // Initialize the state variable -- will maintain state between function calls typename CONFIG_T::accum_t tmpres[CONFIG_T::n_state * 3]; typename CONFIG_T::accum_t tmpres_state_zr[CONFIG_T::n_state * 3]; typename CONFIG_T::accum_t tmpres_state_h[CONFIG_T::n_state]; @@ -719,6 +760,27 @@ void gru_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[ } } +/* Alternative gru_static beginning +template +void gru_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3], + bool backward_selector=false) { + // Initialize the state variable -- will maintain state between function calls + + static res_T h_state_forward[CONFIG_T::n_state]; + res_T *h_state; + if constexpr (bidirectional) { + static res_T h_state_backward[CONFIG_T::n_state]; + h_state = backward_selector ? h_state_backward : h_state_forward; + } + else { + h_state = h_state_forward; + } +*/ + template void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], @@ -760,6 +822,70 @@ void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CON } } +template +void bidirectionalgru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], + res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3], + typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 3 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_b_t param_zr_back[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 3], + typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 3]) { + + res_T h_state[CONFIG_T::n_state]; + data_T data_in[CONFIG_T::n_in]; + res_T h_state_back[CONFIG_T::n_state]; + data_T data_in_back[CONFIG_T::n_in]; + bool reset_state = true; + + #pragma HLS ARRAY_PARTITION variable=h_state complete + #pragma HLS ARRAY_PARTITION variable=data_in complete + #pragma HLS ARRAY_PARTITION variable=h_state_back complete + #pragma HLS ARRAY_PARTITION variable=data_in_back complete + + for (int ii = 0; ii < CONFIG_T::n_state; ii++) { + #pragma HLS UNROLL + h_state[ii] = 0; + h_state_back[ii] = 0; + } + for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { + for (int j = 0; j < CONFIG_T::n_in; j++) { + #pragma HLS UNROLL + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; + } + if (CONFIG_T::use_static) { + nnet::gru_static(reset_state, data_in, h_state, param, param_zr, param_b, param_br); + nnet::gru_static(reset_state, data_in_back, h_state_back, param_back, param_zr_back, + param_b_back, param_br_back); + } else { + nnet::gru(reset_state, data_in, h_state, param, param_zr, param_b, param_br); + nnet::gru(reset_state, data_in_back, h_state_back, param_back, param_zr_back, + param_b_back, param_br_back); + } + if (CONFIG_T::n_sequence_out > 1) { + for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { + #pragma HLS UNROLL + res[i] = h_state[j]; + } + for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; + i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { + #pragma HLS UNROLL + res[i] = h_state_back[j]; + } + } + reset_state = false; + } + if (CONFIG_T::n_sequence_out == 1) + for (int i = 0; i < (CONFIG_T::n_state); i++) { + #pragma HLS UNROLL + res[i] = h_state[i]; + res[i + CONFIG_T::n_state] = h_state_back[i]; + } +} + template void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_state[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], @@ -795,6 +921,57 @@ void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_state[C } } +template +void bidirectionalgru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_state[CONFIG_T::n_state], + h_T h_state_back[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3], + typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 3 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_b_t param_zr_back[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 3], + typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 3]) { + + data_T data_in[CONFIG_T::n_in]; + data_T data_in_back[CONFIG_T::n_in]; + bool reset_state = false; + + #pragma HLS ARRAY_PARTITION variable=h_state complete + #pragma HLS ARRAY_PARTITION variable=data_in complete + #pragma HLS ARRAY_PARTITION variable=h_state_back complete + #pragma HLS ARRAY_PARTITION variable=data_in_back complete + + for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { + for (int j = 0; j < CONFIG_T::n_in; j++) { + #pragma HLS UNROLL + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; + } + nnet::gru(reset_state, data_in, h_state, param, param_zr, param_b, param_br); + nnet::gru(reset_state, data_in_back, h_state_back, param_back, param_zr_back, param_b_back, + param_br_back); + if (CONFIG_T::n_sequence_out > 1) { + for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { + #pragma HLS UNROLL + res[i] = h_state[j]; + } + for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; + i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { + #pragma HLS UNROLL + res[i] = h_state_back[j]; + } + } + reset_state = false; + } + if (CONFIG_T::n_sequence_out == 1) + for (int i = 0; i < (CONFIG_T::n_state); i++) { + #pragma HLS UNROLL + res[i] = h_state[i]; + res[i + CONFIG_T::n_state] = h_state_back[i]; + } +} + template void gru_stack(hls::stream &data_stream, hls::stream &res_stream, typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], From d2c6cc0417d102821be65a85b13ea3ef951fb3ed Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 16 May 2025 17:09:31 +0200 Subject: [PATCH 11/26] ADD parse general bidirectional layer with possibly different architectures --- hls4ml/converters/keras/recurrent.py | 111 +++++++++++++-------------- 1 file changed, 55 insertions(+), 56 deletions(-) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index be04619d0b..97f4cd152c 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -117,75 +117,74 @@ def parse_time_distributed_layer(keras_layer, input_names, input_shapes, data_re def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reader): assert keras_layer['class_name'] == 'Bidirectional' - rnn_layer = keras_layer['config']['layer'] - assert rnn_layer['class_name'] in rnn_layers or rnn_layer['class_name'][1:] in rnn_layers - - layer = parse_default_keras_layer(rnn_layer, input_names) - layer['name'] = keras_layer['config']['name'] - layer['class_name'] = 'Bidirectional' + layer['class_name'] - layer['direction'] = 'bidirectional' - - # TODO Should we handle different architectures for forward and backward layer? + rnn_forward_layer = keras_layer['config']['layer'] if keras_layer['config'].get('backward_layer'): - raise Exception('Different architectures between forward and backward layers are not supported by hls4ml') - - layer['return_sequences'] = rnn_layer['config']['return_sequences'] - layer['return_state'] = rnn_layer['config']['return_state'] - - if 'SimpleRNN' not in layer['class_name']: - layer['recurrent_activation'] = rnn_layer['config']['recurrent_activation'] + rnn_backward_layer = keras_layer['config']['backward_layer'] + if rnn_forward_layer['config']['go_backwards']: + temp_layer = rnn_forward_layer.copy() + rnn_forward_layer = rnn_backward_layer.copy() + rnn_backward_layer = temp_layer + else: + rnn_backward_layer = rnn_forward_layer - layer['time_major'] = rnn_layer['config']['time_major'] if 'time_major' in rnn_layer['config'] else False + assert (rnn_forward_layer['class_name'] in rnn_layers or rnn_forward_layer['class_name'][1:] in rnn_layers) and ( + rnn_backward_layer['class_name'] in rnn_layers or rnn_backward_layer['class_name'][1:] in rnn_layers + ) + layer = {} + layer['name'] = keras_layer['config']['name'] + layer['forward_layer'] = parse_default_keras_layer(rnn_forward_layer, input_names) + layer['backward_layer'] = parse_default_keras_layer(rnn_backward_layer, input_names) + layer['class_name'] = ( + 'Bidirectional' + layer['forward_layer']['class_name'] + ) # TODO: to be changed if we ever implement different + # architecture for forward and backward layer + layer['direction'] = 'bidirectional' + layer['return_sequences'] = rnn_forward_layer['config']['return_sequences'] + layer['return_state'] = rnn_forward_layer['config']['return_state'] + layer['time_major'] = rnn_forward_layer['config']['time_major'] if 'time_major' in rnn_forward_layer['config'] else False # TODO Should we handle time_major? if layer['time_major']: raise Exception('Time-major format is not supported by hls4ml') - layer['n_timesteps'] = input_shapes[0][1] layer['n_in'] = input_shapes[0][2] - assert keras_layer['config']['merge_mode'] in merge_modes layer['merge_mode'] = keras_layer['config']['merge_mode'] - layer['n_out'] = rnn_layer['config']['units'] - if keras_layer['config']['merge_mode'] == 'concat': - layer['n_out'] *= 2 + for direction, rnn_layer in [('forward_layer', rnn_forward_layer), ('backward_layer', rnn_backward_layer)]: - rnn_layer_name = rnn_layer['config']['name'] - if 'SimpleRNN' in layer['class_name']: - cell_name = 'simple_rnn' - else: - cell_name = rnn_layer['class_name'].lower() - layer['weight_data'], layer['recurrent_weight_data'], layer['bias_data'] = get_weights_data( - data_reader, - layer['name'], - [ - f'forward_{rnn_layer_name}/{cell_name}_cell/kernel', - f'forward_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', - f'forward_{rnn_layer_name}/{cell_name}_cell/bias', - ], - ) - layer['weight_b_data'], layer['recurrent_weight_b_data'], layer['bias_b_data'] = get_weights_data( - data_reader, - layer['name'], - [ - f'backward_{rnn_layer_name}/{cell_name}_cell/kernel', - f'backward_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', - f'backward_{rnn_layer_name}/{cell_name}_cell/bias', - ], - ) - - if 'GRU' in layer['class_name']: - layer['apply_reset_gate'] = 'after' if rnn_layer['config']['reset_after'] else 'before' + if 'SimpleRNN' not in rnn_layer['class_name']: + layer[direction]['recurrent_activation'] = rnn_layer['config']['recurrent_activation'] - # biases array is actually a 2-dim array of arrays (bias + recurrent bias) - # both arrays have shape: n_units * 3 (z, r, h_cand) - biases = layer['bias_data'] - biases_b = layer['bias_b_data'] - layer['bias_data'] = biases[0] - layer['recurrent_bias_data'] = biases[1] - layer['bias_b_data'] = biases_b[0] - layer['recurrent_bias_b_data'] = biases_b[1] + rnn_layer_name = rnn_layer['config']['name'] + if 'SimpleRNN' in layer['class_name']: + cell_name = 'simple_rnn' + else: + cell_name = rnn_layer['class_name'].lower() + layer[direction]['weight_data'], layer[direction]['recurrent_weight_data'], layer[direction]['bias_data'] = ( + get_weights_data( + data_reader, + layer['name'], + [ + f'{direction[:-6]}_{rnn_layer_name}/{cell_name}_cell/kernel', + f'{direction[:-6]}_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', + f'{direction[:-6]}_{rnn_layer_name}/{cell_name}_cell/bias', + ], + ) + ) + + if 'GRU' in rnn_layer['class_name']: + layer[direction]['apply_reset_gate'] = 'after' if rnn_layer['config']['reset_after'] else 'before' + + # biases array is actually a 2-dim array of arrays (bias + recurrent bias) + # both arrays have shape: n_units * 3 (z, r, h_cand) + biases = layer[direction]['bias_data'] + layer[direction]['bias_data'] = biases[0] + layer[direction]['recurrent_bias_data'] = biases[1] + + layer[direction]['n_states'] = rnn_layer['config']['units'] + + layer['n_out'] = layer['forward_layer']['n_states'] + layer['backward_layer']['n_states'] if layer['return_sequences']: output_shape = [input_shapes[0][0], layer['n_timesteps'], layer['n_out']] From edf7cdfa640ef07356ae73e4163b1b81eeba5c7c Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Mon, 19 May 2025 17:33:29 +0200 Subject: [PATCH 12/26] ADD paring for general bidirectional layer --- hls4ml/converters/keras/recurrent.py | 46 +++++++++++++++++----------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index 97f4cd152c..6b2b5b6270 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -133,12 +133,10 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade layer = {} layer['name'] = keras_layer['config']['name'] - layer['forward_layer'] = parse_default_keras_layer(rnn_forward_layer, input_names) - layer['backward_layer'] = parse_default_keras_layer(rnn_backward_layer, input_names) - layer['class_name'] = ( - 'Bidirectional' + layer['forward_layer']['class_name'] - ) # TODO: to be changed if we ever implement different - # architecture for forward and backward layer + layer['class_name'] = keras_layer['class_name'] + if input_names is not None: + layer['inputs'] = input_names + layer['direction'] = 'bidirectional' layer['return_sequences'] = rnn_forward_layer['config']['return_sequences'] layer['return_state'] = rnn_forward_layer['config']['return_state'] @@ -151,40 +149,52 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade assert keras_layer['config']['merge_mode'] in merge_modes layer['merge_mode'] = keras_layer['config']['merge_mode'] - for direction, rnn_layer in [('forward_layer', rnn_forward_layer), ('backward_layer', rnn_backward_layer)]: + for direction, rnn_layer in [('forward', rnn_forward_layer), ('backward', rnn_backward_layer)]: + + layer[f'{direction}_name'] = rnn_layer['config']['name'] + layer[f'{direction}_class_name'] = rnn_layer['class_name'] + + layer[f'{direction}_data_format'] = rnn_layer['config'].get('data_format', 'channels_last') + + if 'activation' in rnn_layer['config']: + layer[f'{direction}_activation'] = rnn_layer['config']['activation'] + if 'epsilon' in rnn_layer['config']: + layer[f'{direction}_epsilon'] = rnn_layer['config']['epsilon'] + if 'use_bias' in rnn_layer['config']: + layer[f'{direction}_use_bias'] = rnn_layer['config']['use_bias'] if 'SimpleRNN' not in rnn_layer['class_name']: - layer[direction]['recurrent_activation'] = rnn_layer['config']['recurrent_activation'] + layer[f'{direction}_recurrent_activation'] = rnn_layer['config']['recurrent_activation'] rnn_layer_name = rnn_layer['config']['name'] if 'SimpleRNN' in layer['class_name']: cell_name = 'simple_rnn' else: cell_name = rnn_layer['class_name'].lower() - layer[direction]['weight_data'], layer[direction]['recurrent_weight_data'], layer[direction]['bias_data'] = ( + layer[f'{direction}_weight_data'], layer[f'{direction}_recurrent_weight_data'], layer[f'{direction}_bias_data'] = ( get_weights_data( data_reader, layer['name'], [ - f'{direction[:-6]}_{rnn_layer_name}/{cell_name}_cell/kernel', - f'{direction[:-6]}_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', - f'{direction[:-6]}_{rnn_layer_name}/{cell_name}_cell/bias', + f'{direction}_{rnn_layer_name}/{cell_name}_cell/kernel', + f'{direction}_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', + f'{direction}_{rnn_layer_name}/{cell_name}_cell/bias', ], ) ) if 'GRU' in rnn_layer['class_name']: - layer[direction]['apply_reset_gate'] = 'after' if rnn_layer['config']['reset_after'] else 'before' + layer[f'{direction}_apply_reset_gate'] = 'after' if rnn_layer['config']['reset_after'] else 'before' # biases array is actually a 2-dim array of arrays (bias + recurrent bias) # both arrays have shape: n_units * 3 (z, r, h_cand) - biases = layer[direction]['bias_data'] - layer[direction]['bias_data'] = biases[0] - layer[direction]['recurrent_bias_data'] = biases[1] + biases = layer[f'{direction}_bias_data'] + layer[f'{direction}_bias_data'] = biases[0] + layer[f'{direction}_recurrent_bias_data'] = biases[1] - layer[direction]['n_states'] = rnn_layer['config']['units'] + layer[f'{direction}_n_states'] = rnn_layer['config']['units'] - layer['n_out'] = layer['forward_layer']['n_states'] + layer['backward_layer']['n_states'] + layer['n_out'] = layer['forward_n_states'] + layer['backward_n_states'] if layer['return_sequences']: output_shape = [input_shapes[0][0], layer['n_timesteps'], layer['n_out']] From d882310b4ab0491613cacfda2d9a107d4b27fb15 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Wed, 28 May 2025 14:01:17 +0200 Subject: [PATCH 13/26] ADD gnerale bidirectional wrapper --- hls4ml/model/layers.py | 77 +++++ .../vivado/nnet_utils/nnet_recurrent.h | 305 +++++++++++++++++- 2 files changed, 380 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index e2e93046bc..f24a5bf273 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1538,6 +1538,82 @@ def initialize(self): self.add_weights_variable(name='recurrent_bias_b', var_name='br_b{index}') +class Bidirectional(Layer): + _expected_attributes = [ + Attribute('n_out'), + Attribute('return_sequences', value_type=bool, default=False), + Attribute('return_state', value_type=bool, default=False), + Attribute('pass_initial_states', value_type=bool, default=False), + Attribute('time_major', value_type=bool, default=False), + Attribute('forward_activation', value_type=str), + Attribute('forward_recurrent_activation', value_type=str), + WeightAttribute('forward_weight'), + WeightAttribute('forward_bias'), + WeightAttribute('forward_recurrent_weight'), + WeightAttribute('forward_recurrent_bias'), + TypeAttribute('forward_weight'), + TypeAttribute('forward_bias'), + TypeAttribute('forward_recurrent_weight'), + TypeAttribute('forward_recurrent_bias'), + Attribute('backward_activation', value_type=str), + Attribute('backward_recurrent_activation', value_type=str), + WeightAttribute('backward_weight'), + WeightAttribute('backward_bias'), + WeightAttribute('backward_recurrent_weight'), + WeightAttribute('backward_recurrent_bias'), + TypeAttribute('backward_weight'), + TypeAttribute('backward_bias'), + TypeAttribute('backward_recurrent_weight'), + TypeAttribute('backward_recurrent_bias'), + ] + + def initialize(self): + if self.attributes['return_sequences']: + shape = [self.attributes['n_timesteps'], self.attributes['n_out']] + dims = [f'N_TIME_STEPS_{self.index}', f'N_OUT_{self.index}'] + else: + shape = [self.attributes['n_out']] + dims = [f'N_OUT_{self.index}'] + + self.add_output_variable(shape, dims) + + if self.attributes['return_state']: + state_shape = [self.attributes['n_out']] + state_dims = [f'N_OUT_{self.index}'] + self.add_output_variable( + state_shape, state_dims, out_name=self.outputs[1], var_name='layer{index}_h', type_name='layer{index}_h_t' + ) + self.add_output_variable( + state_shape, state_dims, out_name=self.outputs[2], var_name='layer{index}_c', type_name='layer{index}_c_t' + ) + + for dir in ['forward', 'backward']: + # weights + self.add_weights_variable(name=f'{dir}_weight', var_name=(f'w_{dir[0]}_' + '{index}')) + + # recurrent weights + recurrent_weight = self.get_attr(f'{dir}_recurrent_weight_data') + self.add_weights_variable( + name=f'{dir}_recurrent_weight', var_name=(f'wr_{dir[0]}_' + '{index}'), data=recurrent_weight + ) + + # biases + self.add_weights_variable(name=f'{dir}_bias', var_name=(f'b_{dir[0]}_' + '{index}')) + + if self.attributes[f'{dir}_class_name'] == 'LSTM': + if "pytorch" in self.attributes.keys(): + self.add_weights_variable(name=f'{dir}_recurrent_bias', var_name=(f'br_{dir[0]}_' + '{index}')) + else: + recurrent_bias = np.zeros(recurrent_weight.shape[1]) + self.add_weights_variable( + name=f'{dir}_recurrent_bias', var_name=(f'br_{dir[0]}_' + '{index}'), data=recurrent_bias + ) + else: + self.add_weights_variable( + name=f'{dir}_recurrent_bias', var_name=(f'br_{dir[0]}_' + '{index}'), data=recurrent_bias + ) + + class GarNet(Layer): ref_impl = False @@ -1828,6 +1904,7 @@ def initialize(self): 'GRU': GRU, 'BidirectionalLSTM': BidirectionalLSTM, 'BidirectionalGRU': BidirectionalGRU, + 'Bidirectional': Bidirectional, 'QSimpleRNN': SimpleRNN, 'QLSTM': LSTM, 'QGRU': GRU, diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h index ba988c44e2..785e7fede8 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h @@ -6,6 +6,7 @@ #include "nnet_common.h" #include "nnet_dense.h" #include "nnet_recr_activations.h" +#include namespace nnet { @@ -43,6 +44,25 @@ struct bidirectionallstm_config : lstm_config { typedef float bias_b_t; typedef float recurrent_bias_b_t; }; + +template struct bidirectional_config { + // Layer Sizes + static const unsigned n_in = 2; + static const unsigned n_parts = 20; + static const unsigned n_out = 2; + static const unsigned table_size = 1024; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + static const unsigned n_zeros = 0; + static const bool store_weights_in_bram = false; + static const bool use_static = true; + + // Layers info + static const RNNForward_config Forward; + static const RNNBackward_config Backward; +}; // Long Short term Memory NN (LSTM) // Resources: // https://github.com/nicodjimenez/lstm/blob/master/lstm.py @@ -160,6 +180,18 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate nnet::dense(h_state, tmpres_state, param_r, param_br); + /* + std::cout << " tmpres: "; + for (int i = 0; i < CONFIG_T::n_state*4; i++){ + std::cout << " " << tmpres[i]; + } + std::cout << std::endl; + std::cout << " tmpres_state: "; + for (int i = 0; i < CONFIG_T::n_state*4; i++){ + std::cout << " " << tmpres_state[i]; + } + std::cout << std::endl << std::endl; + */ for (int iacc = 0; iacc < (3 * CONFIG_T::n_state); iacc++) { #pragma HLS UNROLL int index = iacc; @@ -172,14 +204,36 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate int index = iacc + CONFIG_T::n_state * 2; inputacc_c[iacc] = tmpres[index] + tmpres_state[index]; } - + /* + std::cout << " inputacc_ifo: "; + for (int i = 0; i < CONFIG_T::n_state*3; i++){ + std::cout << " " << inputacc_ifo[i]; + } + std::cout << std::endl; + std::cout << " inputacc_c: "; + for (int i = 0; i < CONFIG_T::n_state; i++){ + std::cout << " " << inputacc_c[i]; + } + std::cout << std::endl << std::endl; + */ CONFIG_T::template activation_recr::activation(inputacc_ifo, tmpres_ifo); // Now for the confusion matrix CONFIG_T::template activation::activation(inputacc_c, tmpres_c); - + /* + std::cout << " tmpres_ifo: "; + for (int i = 0; i < CONFIG_T::n_state*3; i++){ + std::cout << " " << tmpres_ifo[i]; + } + std::cout << std::endl; + std::cout << " tmpres_c: "; + for (int i = 0; i < CONFIG_T::n_state; i++){ + std::cout << " " << tmpres_c[i]; + } + std::cout << std::endl << std::endl; + */ // Operation: s=g*i+sold*f (update state with buffer to avoid timing issues) for (int iacc = 0; iacc < (CONFIG_T::n_state); iacc++) { #pragma HLS UNROLL @@ -224,6 +278,31 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate } */ +template struct lstm_struct { + static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_total[2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4]) { + res_T *h_newstate = h_total; + res_T *s_newstate = h_newstate + CONFIG_T::n_state; + nnet::lstm(reset_state, data, h_newstate, s_newstate, param, param_r, param_b, param_br); + }; +}; + +template struct lstm_struct_static { + static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_total[2 * CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4]) { + res_T *h_newstate = h_total; + res_T *s_newstate = h_newstate + CONFIG_T::n_state; + nnet::lstm_static(reset_state, data, h_newstate, s_newstate, param, param_r, + param_b, param_br); + }; +}; + template void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], @@ -269,6 +348,140 @@ void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CO } } +template class RNNFunc_Forward, + template class RNNFunc_Backward> +void bidirectional_stack( + data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_out], + typename CONFIG_T::Forward::weight_t param[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult * CONFIG_T::n_in], + typename CONFIG_T::Forward::recurrent_weight_t + param_r[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult * CONFIG_T::Forward::n_state], + typename CONFIG_T::Forward::bias_t param_b[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult], + typename CONFIG_T::Forward::recurrent_bias_t param_br[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult], + typename CONFIG_T::Backward::weight_t + param_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult * CONFIG_T::n_in], + typename CONFIG_T::Backward::recurrent_weight_t + param_r_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult * CONFIG_T::Backward::n_state], + typename CONFIG_T::Backward::bias_t param_b_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult], + typename CONFIG_T::Backward::recurrent_bias_t param_br_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult]) { + + res_T h_newstate[(CONFIG_T::Forward::n_mult - 2) * CONFIG_T::Forward::n_state]; + res_T h_newstate_back[(CONFIG_T::Backward::n_mult - 2) * CONFIG_T::Backward::n_state]; + data_T data_in[CONFIG_T::n_in]; + data_T data_in_back[CONFIG_T::n_in]; + bool reset_state = true; + + #pragma HLS ARRAY_PARTITION variable=h_newstate complete + #pragma HLS ARRAY_PARTITION variable=s_newstate complete + #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete + #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete + + for (int ii = 0; ii < (CONFIG_T::Forward::n_mult - 2) * CONFIG_T::Forward::n_state; ii++) { + #pragma HLS UNROLL + h_newstate[ii] = 0; + } + for (int ii = 0; ii < (CONFIG_T::Backward::n_mult - 2) * CONFIG_T::Backward::n_state; ii++) { + #pragma HLS UNROLL + h_newstate_back[ii] = 0; + } + + // std::cout << "Data_t size: " << data_T::size << std::endl; + /* + std::cout << " W: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ + std::cout << " " << param[i_w]; + } + std::cout << "\n WR: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ + std::cout << " " << param_r[i_w]; + } + std::cout << "\n B: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_b[i_w]; + } + std::cout << "\n BR: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_br[i_w]; + } + std::cout << "\n BW: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ + std::cout << " " << param_back[i_w]; + } + std::cout << "\n W_B: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ + std::cout << " " << param_r_back[i_w]; + } + std::cout << "\n B_B: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_b_back[i_w]; + } + std::cout << "\n BR_B:" << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_br_back[i_w]; + } + std::cout << std::endl << std::endl; + + std::cout << " States:" << std::endl << " "; + + std::cout << " " << 0 <<":"; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; + std::cout << std::endl << std::endl; + */ + for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { + for (int j = 0; j < CONFIG_T::n_in; j++) { + #pragma HLS UNROLL + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; + } + RNNFunc_Forward::apply(reset_state, data_in, h_newstate, param, param_r, param_b, + param_br); + RNNFunc_Backward::apply(reset_state, data_in_back, h_newstate_back, param_back, + param_r_back, param_b_back, param_br_back); + /* + std::cout << " " << iloop+1 <<":"; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; + std::cout << std::endl << std::endl; + */ + if (CONFIG_T::n_sequence_out > 1) { + for (int i = (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * iloop, j = 0; + i < (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * iloop + CONFIG_T::Forward::n_state; + i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate[j]; + } + for (int i = (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * iloop + CONFIG_T::Forward::n_state, + j = 0; + i < (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * (iloop + 1); i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate_back[j]; + } + } + reset_state = false; + } + if (CONFIG_T::n_sequence_out == 1) { + for (int i = 0; i < (CONFIG_T::Forward::n_state); i++) { + #pragma HLS UNROLL + res[i] = h_newstate[i]; + } + for (int i = 0; i < (CONFIG_T::Backward::n_state); i++) { + #pragma HLS UNROLL + res[i + CONFIG_T::Forward::n_state] = h_newstate_back[i]; + } + } +} + template void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], @@ -301,6 +514,55 @@ void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_newstate_back[ii] = 0; s_newstate_back[ii] = 0; } + + // std::cout << "Data_t size: " << data_T::size << std::endl; + /* + std::cout << " W: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ + std::cout << " " << param[i_w]; + } + std::cout << "\n WR: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ + std::cout << " " << param_r[i_w]; + } + std::cout << "\n B: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_b[i_w]; + } + std::cout << "\n BR: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_br[i_w]; + } + std::cout << "\n BW: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ + std::cout << " " << param_back[i_w]; + } + std::cout << "\n W_B: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ + std::cout << " " << param_r_back[i_w]; + } + std::cout << "\n B_B: " << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_b_back[i_w]; + } + std::cout << "\n BR_B:" << std::endl << " "; + for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ + std::cout << " " << param_br_back[i_w]; + } + std::cout << std::endl << std::endl; + + std::cout << " States:" << std::endl << " "; + + std::cout << " " << 0 <<":"; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; + std::cout << std::endl << std::endl; + */ for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { for (int j = 0; j < CONFIG_T::n_in; j++) { #pragma HLS UNROLL @@ -318,6 +580,17 @@ void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, param_r_back, param_b_back, param_br_back); } + /* + std::cout << " " << iloop+1 <<":"; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; + std::cout << std::endl << " "; + for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; + std::cout << std::endl << std::endl; + */ if (CONFIG_T::n_sequence_out > 1) { for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { #pragma HLS UNROLL @@ -392,6 +665,10 @@ void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], data_T data_in_back[CONFIG_T::n_in]; bool reset_state = false; + std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; + std::cout << "Data_t size: " << data_T::size << std::endl; + std::cout << std::endl << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; + #pragma HLS ARRAY_PARTITION variable=h_newstate complete #pragma HLS ARRAY_PARTITION variable=s_newstate complete #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete @@ -448,6 +725,10 @@ void lstm_stack(hls::stream &data_stream, hls::stream &res_stream typename data_T::value_type data_in[CONFIG_T::n_in]; bool reset_state = true; + std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; + std::cout << "Data_t size: " << data_T::size << std::endl; + std::cout << std::endl << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; + DataPropagation: for (int i_in = 0; i_in < CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size; i_in++) { if (CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size > 1) { @@ -781,6 +1062,26 @@ void gru_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[ } */ +template struct gru_struct { + static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_state[CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3]) { + nnet::gru(reset_state, data, h_state, param, param_zr, param_b, param_br); + }; +}; + +template struct gru_struct_static { + static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_state[CONFIG_T::n_state], + typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], + typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], + typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3]) { + nnet::gru_static(reset_state, data, h_state, param, param_zr, param_b, param_br); + }; +}; + template void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], From 4ed22c96b9c02f868cccb49dc475ddcbebabff6f Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Wed, 11 Jun 2025 11:52:15 +0200 Subject: [PATCH 14/26] ADD Bidirectional layers support --- hls4ml/backends/fpga/fpga_backend.py | 31 +- .../vivado/passes/recurrent_templates.py | 282 ++++--- .../vivado/passes/resource_strategy.py | 16 +- hls4ml/backends/vivado/vivado_backend.py | 62 +- hls4ml/converters/keras_v2_to_hls.py | 2 +- hls4ml/model/layers.py | 68 +- .../model/optimizer/passes/infer_precision.py | 7 +- .../vivado/nnet_utils/nnet_recurrent.h | 725 ++++-------------- 8 files changed, 380 insertions(+), 813 deletions(-) diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index 2bdc90baa3..a17e91e2fd 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -14,8 +14,7 @@ Activation, BatchNormalization, BatchNormOnnx, - BidirectionalGRU, - BidirectionalLSTM, + Bidirectional, Conv, Conv1D, Conv2D, @@ -70,8 +69,7 @@ def __init__(self, name): SimpleRNN, LSTM, GRU, - BidirectionalLSTM, - BidirectionalGRU, + Bidirectional, Dot, Conv, MatMul, @@ -217,13 +215,6 @@ def get_layer_mult_size(self, layer): n_out = layer.get_attr('n_filt') return n_in, n_out - if 'BidirectionalLSTM' in layer.class_name: - n_in = layer.get_attr('n_in') - n_out = layer.get_attr('n_out') * 2 # /2*4 - n_in_recr = layer.get_attr('n_out') // 2 - n_out_recr = n_out - return n_in, n_out, n_in_recr, n_out_recr - if 'LSTM' in layer.class_name: n_in = layer.get_attr('n_in') n_out = layer.get_attr('n_out') * 4 @@ -231,13 +222,6 @@ def get_layer_mult_size(self, layer): n_out_recr = n_out return n_in, n_out, n_in_recr, n_out_recr - if 'BidirectionalGRU' in layer.class_name: - n_in = layer.get_attr('n_in') - n_out = layer.get_attr('n_out') // 2 * 3 - n_in_recr = layer.get_attr('n_out') // 2 - n_out_recr = n_out - return n_in, n_out, n_in_recr, n_out_recr - if 'GRU' in layer.class_name: n_in = layer.get_attr('n_in') n_out = layer.get_attr('n_out') * 3 @@ -245,6 +229,16 @@ def get_layer_mult_size(self, layer): n_out_recr = n_out return n_in, n_out, n_in_recr, n_out_recr + if 'Bidirectional' in layer.class_name: + result = [] + for d in ['forward', 'backward']: + n_in = layer.get_attr('n_in') + n_out = layer.get_attr(f'{d}_n_states') * 3 + n_in_recr = layer.get_attr(f'{d}_n_states') + n_out_recr = n_out + result.append((n_in, n_out, n_in_recr, n_out_recr)) + return result + raise Exception(f'Cannot get mult size for layer {layer.name} ({layer.class_name})') def get_valid_reuse_factors(self, n_in, n_out): @@ -295,6 +289,7 @@ def set_closest_reuse_factor(self, layer, n_in, n_out, attribute='reuse_factor', if not include_max_rf: valid_rf.pop() chosen_rf = layer.get_attr(attribute) + print("\n\nREuse factor:", chosen_rf, "\n\n") if chosen_rf not in valid_rf: closest_rf = self.get_closest_reuse_factor(valid_rf, chosen_rf) valid_rf_str = ','.join(map(str, valid_rf)) diff --git a/hls4ml/backends/vivado/passes/recurrent_templates.py b/hls4ml/backends/vivado/passes/recurrent_templates.py index 33fbb5406e..1542500d62 100644 --- a/hls4ml/backends/vivado/passes/recurrent_templates.py +++ b/hls4ml/backends/vivado/passes/recurrent_templates.py @@ -1,6 +1,6 @@ from hls4ml.backends.backend import get_backend from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate -from hls4ml.model.layers import GRU, LSTM, BidirectionalLSTM, BidirectionalGRU, TimeDistributed +from hls4ml.model.layers import GRU, LSTM, Bidirectional, TimeDistributed # recurrent multiplication template @@ -86,16 +86,14 @@ static const bool pytorch_order = {pytorch}; }};\n""" -bidir_recr_config_template = """struct config{index} : nnet::{recr_type}_config {{ +# Bidirectional templates + +single_config_template = """struct config{index} : nnet::single_layer_config {{ typedef {accum_t.name} accum_t; typedef {weight_t.name} weight_t; // Matrix typedef {recurrent_weight_t.name} recurrent_weight_t; // Matrix typedef {bias_t.name} bias_t; // Vector typedef {recurrent_bias_t.name} recurrent_bias_t; // Vector - typedef {weight_b_t.name} weight_b_t; // Matrix - typedef {recurrent_weight_b_t.name} recurrent_weight_b_t; // Matrix - typedef {bias_b_t.name} bias_b_t; // Vector - typedef {recurrent_bias_b_t.name} recurrent_bias_b_t; // Vector typedef {config_mult_t1} mult_config1; typedef {config_mult_t2} mult_config2; typedef {recr_act_t} ACT_CONFIG_{RECR_TYPE}; @@ -105,8 +103,20 @@ template using activation = nnet::activation::{activation}; static const unsigned n_in = {n_in}; - static const unsigned n_out = {n_out}; static const unsigned n_state = {n_state}; + static const unsigned n_mult = {n_mult}; + static const bool pytorch_order = {pytorch}; +}};\n""" + +bidirectional_config_template = """struct config{index} : nnet::bidirectional_config {{ + typedef {forward_t} FORWARD_CONFIG; + template + using RNNfunc_forward = nnet::{forward_layer}; + typedef {backward_t} BACKWARD_CONFIG; + template + using RNNfunc_backward = nnet::{backward_layer}; + static const unsigned n_in = {n_in}; + static const unsigned n_out = {n_out}; static const unsigned n_sequence = {n_sequence}; static const unsigned n_sequence_out = {n_sequence_out}; static const unsigned io_type = nnet::{strategy}; @@ -120,9 +130,7 @@ recr_function_template_initial_states_lstm = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 recr_function_template_initial_states_gru = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 -recr_bidir_function_template = 'nnet::{recr_type}_stack<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br}, {w_b}, {wr_b}, {b_b}, {br_b});' # noqa: E501 -recr_bidir_function_template_initial_states_lstm = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 -recr_bidir_function_template_initial_states_gru = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});' # noqa: E501 +bidirectional_function_template = 'nnet::bidirectional_stack<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br}, {w_b}, {wr_b}, {b_b}, {br_b});' # noqa: E501 recr_include_list = ['nnet_utils/nnet_recurrent.h'] @@ -241,118 +249,151 @@ def format(self, node): return mult_config1 + '\n' + mult_config2 + '\n' + recr_act_config + '\n' + act_config + '\n' + recr_config -class BidirectionalRecurrentConfigTemplate(LayerConfigTemplate): +class BidirectionalConfigTemplate(LayerConfigTemplate): def __init__(self): - super().__init__((BidirectionalLSTM, BidirectionalGRU)) - self.template = bidir_recr_config_template + super().__init__(Bidirectional) + self.template = bidirectional_config_template + self.layer_template = single_config_template self.act_template = activ_config_template self.recr_act_template = recr_activ_config_template self.mult1_template = recr_mult_config_template_1 self.mult2_template = recr_mult_config_template_2 def format(self, node): + + # ----- Bidirectional Layer Config -----# params = self._default_config_params(node) params['n_in'] = node.get_input_variable().dim_names[1] params['n_sequence'] = node.get_input_variable().dim_names[0] if node.get_attr('return_sequences'): params['n_sequence_out'] = node.get_output_variable().dim_names[0] - params['n_state'] = f'{node.get_output_variable().dim_names[1]} / 2' - params['n_out'] = node.get_output_variable().dim_names[1] else: params['n_sequence_out'] = 1 - params['n_state'] = f'{node.get_output_variable().dim_names[0]} / 2' - params['n_out'] = node.get_output_variable().dim_names[0] - params['config_mult_t1'] = f'config{node.index}_1' - params['config_mult_t2'] = f'config{node.index}_2' - params['recr_act_t'] = '{}_config{}_recr'.format(node.get_attr('recurrent_activation'), node.index) - params['act_t'] = '{}_config{}'.format(node.get_attr('activation'), node.index) + params['n_out'] = node.get_attr('n_out') params['strategy'] = node.get_attr('strategy') params['static'] = 'true' if node.attributes['static'] else 'false' params['pytorch'] = 'true' if node.get_attr('pytorch', False) else 'false' - params['recr_type'] = node.class_name.lower() - params['RECR_TYPE'] = node.class_name[13:] - - if node.class_name == 'BidirectionalLSTM': - n_recr_mult = 4 - else: # BidirectionalGRU - n_recr_mult = 3 + params['forward_t'] = f'config{node.index}_forward' + params['backward_t'] = f'config{node.index}_backward' + params['forward_layer'] = node.get_attr('forward_class_name').lower() + '_class' + params['backward_layer'] = node.get_attr('backward_class_name').lower() + '_class' + if node.attributes['static']: + params['forward_layer'] += '_static' + params['backward_layer'] += '_static' recr_config = self.template.format(**params) - act_params = self._default_config_params(node) - recr_act_params = self._default_config_params(node) - - act_params['type'] = node.get_attr('activation') - recr_act_params['type'] = node.get_attr('recurrent_activation') - if node.get_attr('return_sequences'): - act_params['n_in'] = node.get_output_variable().shape[1] - recr_act_params['n_in'] = node.get_output_variable().shape[1] * (n_recr_mult - 1) - else: - act_params['n_in'] = node.get_output_variable().shape[0] - recr_act_params['n_in'] = node.get_output_variable().shape[0] * (n_recr_mult - 1) - - act_config = self.act_template.format(**act_params) - recr_act_config = self.recr_act_template.format(**recr_act_params) - - mult_params1 = self._default_config_params(node) - mult_params2 = self._default_config_params(node) - - mult_params1['n_in'] = node.get_input_variable().shape[1] - if node.get_attr('return_sequences'): - mult_params1['n_out'] = node.get_output_variable().shape[1] // 2 * n_recr_mult - else: - mult_params1['n_out'] = node.get_output_variable().shape[0] // 2 * n_recr_mult - mult_params1['product_type'] = get_backend('vivado').product_type( - node.get_input_variable().type.precision, node.get_weights('weight').type.precision - ) - mult_params1['reuse'] = params['reuse'] - mult_params1['index'] = str(node.index) + '_1' - mult_params1['nzeros'] = node.get_weights('weight').nzeros - mult_params1['nonzeros'] = node.get_weights('weight').nonzeros - - namespace = params['namespace'] - - if node.get_attr('strategy').lower() == 'latency': - mult_params1['dense_function'] = 'nnet::DenseLatency' - elif node.get_attr('strategy').lower() == 'resource': - if int(mult_params1['reuse_factor']) <= int(mult_params1['n_in']): - mult_params1['dense_function'] = 'nnet::DenseResource_rf_leq_nin' - else: - mult_params1['dense_function'] = 'nnet::DenseResource_rf_gt_nin_rem0' - # The 3rd case is never used - elif node.get_attr('strategy').lower() == 'resource_unrolled': - mult_params1['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}_1' - - if node.get_attr('return_sequences'): - mult_params2['n_in'] = node.get_output_variable().shape[1] // 2 - mult_params2['n_out'] = node.get_output_variable().shape[1] // 2 * n_recr_mult - else: - mult_params2['n_in'] = node.get_output_variable().shape[0] // 2 - mult_params2['n_out'] = node.get_output_variable().shape[0] // 2 * n_recr_mult - mult_params2['product_type'] = get_backend('vivado').product_type( - node.get_input_variable().type.precision, node.get_weights('recurrent_weight').type.precision - ) - mult_params2['reuse'] = node.attributes['recurrent_reuse_factor'] - mult_params2['index'] = str(node.index) + '_2' - mult_params2['nzeros'] = node.get_weights('recurrent_weight').nzeros - mult_params2['nonzeros'] = node.get_weights('recurrent_weight').nonzeros - - if node.get_attr('strategy').lower() == 'latency': - mult_params2['dense_function'] = 'nnet::DenseLatency' - elif node.get_attr('strategy').lower() == 'resource': - if int(mult_params2['reuse_factor']) <= int(mult_params2['n_in']): - mult_params2['dense_function'] = 'nnet::DenseResource_rf_leq_nin' - else: - mult_params2['dense_function'] = 'nnet::DenseResource_rf_gt_nin_rem0' - # The 3rd case is never used - elif node.get_attr('strategy').lower() == 'resource_unrolled': - mult_params2['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}_2' - - mult_config1 = self.mult1_template.format(**mult_params1) - mult_config2 = self.mult2_template.format(**mult_params2) - - return mult_config1 + '\n' + mult_config2 + '\n' + recr_act_config + '\n' + act_config + '\n' + recr_config + # ----- Forward and Backward Layers Config -----# + result = '' + for d in ['forward', 'backward']: + if node.get_attr(f'{d}_class_name') == 'LSTM': + n_recr_mult = 4 + else: # GRU + n_recr_mult = 3 + + # ----- Layer Config -----# + layer_params = self._default_config_params(node) + layer_params['n_in'] = params['n_in'] + layer_params['pytorch'] = params['pytorch'] + layer_params['n_state'] = node.get_attr(f'{d}_n_states') + layer_params['n_mult'] = 4 + if node.get_attr(f'{d}_class_name').lower() == 'gru': + layer_params['n_mult'] = 3 + layer_params['config_mult_t1'] = f'config{node.index}_1_{d[0]}' + layer_params['config_mult_t2'] = f'config{node.index}_2_{d[0]}' + layer_params['recr_act_t'] = '{}_config{}_recr'.format( + node.get_attr(f'{d}_recurrent_activation'), str(node.index) + f'_{d[0]}' + ) + layer_params['act_t'] = '{}_config{}'.format(node.get_attr(f'{d}_activation'), str(node.index) + f'_{d[0]}') + layer_params['RECR_TYPE'] = node.get_attr(f'{d}_class_name') + + layer_params['weight_t'] = layer_params[f'{d}_weight_t'] + layer_params['recurrent_weight_t'] = layer_params[f'{d}_recurrent_weight_t'] + layer_params['bias_t'] = layer_params[f'{d}_bias_t'] + layer_params['recurrent_bias_t'] = layer_params[f'{d}_recurrent_bias_t'] + layer_params['activation'] = layer_params[f'{d}_activation'] + layer_params['recurrent_activation'] = layer_params[f'{d}_recurrent_activation'] + + layer_params['index'] = str(node.index) + f'_{d}' + + layer_config = self.layer_template.format(**layer_params) + + # ----- Activations Config -----# + act_params = self._default_config_params(node) + recr_act_params = self._default_config_params(node) + + act_params['type'] = node.get_attr(f'{d}_activation') + recr_act_params['type'] = node.get_attr(f'{d}_recurrent_activation') + act_params['index'] = str(node.index) + f'_{d[0]}' + recr_act_params['index'] = str(node.index) + f'_{d[0]}' + act_params['n_in'] = node.get_attr(f'{d}_n_states') + recr_act_params['n_in'] = node.get_attr(f'{d}_n_states') * (n_recr_mult - 1) + + act_config = self.act_template.format(**act_params) + recr_act_config = self.recr_act_template.format(**recr_act_params) + + # ----- Mult Config -----# + mult_params1 = self._default_config_params(node) + mult_params2 = self._default_config_params(node) + + mult_params1['n_in'] = node.get_input_variable().shape[1] + mult_params1['n_out'] = node.get_attr(f'{d}_n_states') * n_recr_mult + mult_params1['product_type'] = get_backend('vivado').product_type( + node.get_input_variable().type.precision, node.get_weights(f'{d}_weight').type.precision + ) + mult_params1['reuse'] = params['reuse'] + mult_params1['index'] = str(node.index) + f'_1_{d[0]}' + mult_params1['nzeros'] = node.get_weights(f'{d}_weight').nzeros + mult_params1['nonzeros'] = node.get_weights(f'{d}_weight').nonzeros + + mult_params1['bias_t'] = mult_params1[f'{d}_bias_t'] + mult_params1['weight_t'] = mult_params1[f'{d}_weight_t'] + mult_params2['recurrent_bias_t'] = mult_params2[f'{d}_recurrent_bias_t'] + mult_params2['recurrent_weight_t'] = mult_params2[f'{d}_recurrent_weight_t'] + + namespace = params['namespace'] + + if node.get_attr('strategy').lower() == 'latency': + mult_params1['dense_function'] = 'nnet::DenseLatency' + elif node.get_attr('strategy').lower() == 'resource': + if int(mult_params1[f'{d}_reuse_factor']) <= int(mult_params1['n_in']): + mult_params1['dense_function'] = 'nnet::DenseResource_rf_leq_nin' + else: + mult_params1['dense_function'] = 'nnet::DenseResource_rf_gt_nin_rem0' + # The 3rd case is never used + elif node.get_attr('strategy').lower() == 'resource_unrolled': + mult_params1['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}_1' + + mult_params2['n_in'] = node.get_attr(f'{d}_n_states') + mult_params2['n_out'] = node.get_attr(f'{d}_n_states') * n_recr_mult + mult_params2['product_type'] = get_backend('vivado').product_type( + node.get_input_variable().type.precision, node.get_weights(f'{d}_recurrent_weight').type.precision + ) + mult_params2['reuse'] = node.attributes[f'{d}_recurrent_reuse_factor'] + mult_params2['index'] = str(node.index) + f'_2_{d[0]}' + mult_params2['nzeros'] = node.get_weights(f'{d}_recurrent_weight').nzeros + mult_params2['nonzeros'] = node.get_weights(f'{d}_recurrent_weight').nonzeros + + if node.get_attr('strategy').lower() == 'latency': + mult_params2['dense_function'] = 'nnet::DenseLatency' + elif node.get_attr('strategy').lower() == 'resource': + if int(mult_params2[f'{d}_reuse_factor']) <= int(mult_params2['n_in']): + mult_params2['dense_function'] = 'nnet::DenseResource_rf_leq_nin' + else: + mult_params2['dense_function'] = 'nnet::DenseResource_rf_gt_nin_rem0' + # The 3rd case is never used + elif node.get_attr('strategy').lower() == 'resource_unrolled': + mult_params2['dense_function'] = f'{namespace}::dense_resource_unrolled_{node.index}_2' + + mult_config1 = self.mult1_template.format(**mult_params1) + mult_config2 = self.mult2_template.format(**mult_params2) + + result += ( + mult_config1 + '\n' + mult_config2 + '\n' + recr_act_config + '\n' + act_config + '\n' + layer_config + '\n' + ) + + return result + recr_config class RecurrentFunctionTemplate(FunctionCallTemplate): @@ -452,37 +493,32 @@ def format(self, node): else: return self.template_end.format(**params) -class BidirectionalRecurrentFunctionTemplate(FunctionCallTemplate): +class BidirectionalFunctionTemplate(FunctionCallTemplate): def __init__(self): - super().__init__((BidirectionalLSTM, BidirectionalGRU), include_header=recr_include_list) + super().__init__((Bidirectional), include_header=recr_include_list) def format(self, node): params = self._default_function_params(node) + + # TO DO: Add initial tates functions + ''' if params['pass_initial_states'] == 'true': params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name params['input2'] = node.get_input_variable(node.inputs[1]).name if node.class_name == 'BLSTM': params['input3'] = node.get_input_variable(node.inputs[2]).name params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name - - params['w'] = node.get_weights('weight').name - params['b'] = node.get_weights('bias').name - params['wr'] = node.get_weights('recurrent_weight').name - params['br'] = node.get_weights('recurrent_bias').name - params['w_b'] = node.get_weights('weight_b').name - params['b_b'] = node.get_weights('bias_b').name - params['wr_b'] = node.get_weights('recurrent_weight_b').name - params['br_b'] = node.get_weights('recurrent_bias_b').name - params['activation'] = node.get_attr('activation') - params['recurrent_activation'] = node.get_attr('recurrent_activation') - params['recr_type'] = node.class_name.lower() - - if params['pass_initial_states'] == 'true': - if node.class_name == 'BLSTM': - template = recr_bidir_function_template_initial_states_lstm - else: - template = recr_bidir_function_template_initial_states_gru - else: - template = recr_bidir_function_template + ''' + + params['w'] = node.get_weights('forward_weight').name + params['b'] = node.get_weights('forward_bias').name + params['wr'] = node.get_weights('forward_recurrent_weight').name + params['br'] = node.get_weights('forward_recurrent_bias').name + params['w_b'] = node.get_weights('backward_weight').name + params['b_b'] = node.get_weights('backward_bias').name + params['wr_b'] = node.get_weights('backward_recurrent_weight').name + params['br_b'] = node.get_weights('backward_recurrent_bias').name + + template = bidirectional_function_template return template.format(**params) diff --git a/hls4ml/backends/vivado/passes/resource_strategy.py b/hls4ml/backends/vivado/passes/resource_strategy.py index bbd030c786..407adc9d03 100644 --- a/hls4ml/backends/vivado/passes/resource_strategy.py +++ b/hls4ml/backends/vivado/passes/resource_strategy.py @@ -3,8 +3,7 @@ from hls4ml.model.layers import ( GRU, LSTM, - BidirectionalGRU, - BidirectionalLSTM, + Bidirectional, Conv1D, Conv2D, Dense, @@ -18,9 +17,7 @@ class ApplyResourceStrategy(OptimizerPass): '''Transposes the weights to use the dense_resource matrix multiply routine''' def match(self, node): - node_matches = isinstance( - node, (Dense, Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, LSTM, GRU, BidirectionalLSTM, BidirectionalGRU) - ) + node_matches = isinstance(node, (Dense, Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, LSTM, GRU, Bidirectional)) is_resource_strategy = node.get_attr('strategy', '').lower() in ['resource', 'resource_unrolled'] already_transformed = node.get_attr('_weights_transposed', False) is True return node_matches and is_resource_strategy and not already_transformed @@ -48,11 +45,10 @@ def transform(self, model, node): node.weights['pointwise'].data = np.transpose( node.weights['pointwise'].data, axes=[3, 0, 1, 2] ) # (H,W,C,F) => (F,H,W,C) - elif isinstance(node, (BidirectionalLSTM, BidirectionalGRU)): - node.weights['weight'].data = np.transpose(node.weights['weight'].data) - node.weights['recurrent_weight'].data = np.transpose(node.weights['recurrent_weight'].data) - node.weights['weight_b'].data = np.transpose(node.weights['weight_b'].data) - node.weights['recurrent_weight_b'].data = np.transpose(node.weights['recurrent_weight_b'].data) + elif isinstance(node, (Bidirectional)): + for d in ['forward', 'backward']: + node.weights[f'{d}_weight'].data = np.transpose(node.weights[f'{d}_weight'].data) + node.weights[f'{d}_recurrent_weight'].data = np.transpose(node.weights[f'{d}_recurrent_weight'].data) elif isinstance(node, (LSTM, GRU)): node.weights['weight'].data = np.transpose(node.weights['weight'].data) node.weights['recurrent_weight'].data = np.transpose(node.weights['recurrent_weight'].data) diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index a26a1a89c3..e9d2a80a2e 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -12,8 +12,7 @@ from hls4ml.model.layers import ( GRU, LSTM, - BidirectionalGRU, - BidirectionalLSTM, + Bidirectional, Conv1D, Conv2D, Dense, @@ -48,7 +47,7 @@ def __init__(self): def _register_layer_attributes(self): # Add RNN-specific attributes, recurrent_reuse_factor and static implementation - rnn_layers = [SimpleRNN, LSTM, GRU, BidirectionalLSTM, BidirectionalGRU] + rnn_layers = [SimpleRNN, LSTM, GRU] for layer in rnn_layers: attrs = self.attribute_map.get(layer, []) @@ -60,6 +59,24 @@ def _register_layer_attributes(self): attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type)) self.attribute_map[layer] = attrs + bidir_rnn_layers = [Bidirectional] + for layer in bidir_rnn_layers: + attrs = self.attribute_map.get(layer, []) + attrs.append(ConfigurableAttribute('forward_reuse_factor', default=1, description=descriptions.reuse_factor)) + attrs.append(ConfigurableAttribute('backward_reuse_factor', default=1, description=descriptions.reuse_factor)) + attrs.append( + ConfigurableAttribute('forward_recurrent_reuse_factor', default=1, description=descriptions.reuse_factor) + ) + attrs.append( + ConfigurableAttribute('backward_recurrent_reuse_factor', default=1, description=descriptions.reuse_factor) + ) + attrs.append( + ConfigurableAttribute('static', value_type=bool, default=True, description=descriptions.recurrent_static) + ) + attrs.append(ConfigurableAttribute('table_size', default=1024, description=descriptions.table_size)) + attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type)) + self.attribute_map[layer] = attrs + # Add ParallelizationFactor to Conv1D/2D pf_layers = [ Conv1D, @@ -657,6 +674,45 @@ def init_time_distributed(self, layer): warn(f'Cannot unroll time step loop in layer "{layer.name}" while using "io_stream".') loop_mode = 'off' layer.set_attr('time_step_loop_parallelism', loop_mode) + + @layer_optimizer(Bidirectional) + def init_bidirectional(self, layer): + reuse_factor = layer.model.config.get_reuse_factor(layer) + + for i, d in enumerate(['forward', 'backward']): + layer.set_attr(f'{d}_reuse_factor', reuse_factor) + layer.set_attr(f'{d}_recurrent_reuse_factor', reuse_factor) + + if layer.model.config.is_resource_strategy(layer): + n_in, n_out, n_in_recr, n_out_recr = self.get_layer_mult_size(layer)[i] + self.set_closest_reuse_factor(layer, n_in, n_out, attribute=f'{d}_reuse_factor') + self.set_closest_reuse_factor(layer, n_in_recr, n_out_recr, attribute=f'{d}_recurrent_reuse_factor') + layer.set_attr('strategy', 'resource') + + elif layer.model.config.get_strategy(layer).lower() == 'resource_unrolled': + use_resource_instead = False + if layer.get_attr('reuse_factor', 1) == 1: + print( + f'Unrolled resource strategy cannot be combined with reuse factor 1 in layer "{layer.name} ({d})". ' + 'Using "resource" strategy instead.' + ) + use_resource_instead = True + + n_in, n_out, n_in_recr, n_out_recr = self.get_layer_mult_size(layer)[i] + if use_resource_instead: + self.set_closest_reuse_factor(layer, n_in, n_out, attribute=f'{d}_reuse_factor') + self.set_closest_reuse_factor(layer, n_in_recr, n_out_recr, attribute=f'{d}_recurrent_reuse_factor') + layer.set_attr('strategy', 'resource') + else: + self.set_closest_reuse_factor(layer, n_in, n_out, attribute=f'{d}_reuse_factor', include_max_rf=False) + self.set_closest_reuse_factor( + layer, n_in_recr, n_out_recr, attribute=f'{d}_recurrent_reuse_factor', include_max_rf=False + ) + layer.set_attr('strategy', 'resource_unrolled') + else: + layer.set_attr('strategy', 'latency') + + layer.set_attr('index_t', NamedType(f'layer{layer.index}_index', IntegerPrecisionType(width=1, signed=False))) @layer_optimizer(GarNet) def init_garnet(self, layer): diff --git a/hls4ml/converters/keras_v2_to_hls.py b/hls4ml/converters/keras_v2_to_hls.py index 14e68c7713..6334eccd92 100644 --- a/hls4ml/converters/keras_v2_to_hls.py +++ b/hls4ml/converters/keras_v2_to_hls.py @@ -241,7 +241,7 @@ def parse_keras_model(model_arch, reader): 'HGQ>UnaryLUT', ] # Recurrent layers - recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU', 'QSimpleRNN', 'QLSTM', 'QGRU', 'BidirectionalLSTM', 'BidirectionalGRU'] + recurrent_layers = ['SimpleRNN', 'LSTM', 'GRU', 'QSimpleRNN', 'QLSTM', 'QGRU', 'Bidirectional'] # All supported layers supported_layers = get_supported_keras_layers() + skip_layers diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index f24a5bf273..d337bed68f 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1402,40 +1402,6 @@ def initialize(self): self.add_weights_variable(name='recurrent_bias', var_name='br{index}', data=recurrent_bias) -class BidirectionalLSTM(LSTM): - _expected_attributes = [ - WeightAttribute('weight_b'), - WeightAttribute('bias_b'), - WeightAttribute('recurrent_weight_b'), - WeightAttribute('recurrent_bias_b'), - TypeAttribute('weight_b'), - TypeAttribute('bias_b'), - TypeAttribute('recurrent_weight_b'), - TypeAttribute('recurrent_bias_b'), - ChoiceAttribute('merge_mode', ['sum', 'mul', 'concat', 'ave'], configurable=False, default='concat'), - ] - - def initialize(self): - super().initialize() - - # Add backward layer parameters - # weights - self.add_weights_variable(name='weight_b', var_name='w_b{index}') - - # recurrent weights - self.add_weights_variable(name='recurrent_weight_b', var_name='wr_b{index}') - - # biases - self.add_weights_variable(name='bias_b', var_name='b_b{index}') - - if "pytorch" in self.attributes.keys(): - self.add_weights_variable(name='recurrent_bias_b', var_name='br_b{index}') - else: - recurrent_weight_b = self.get_attr('recurrent_weight_b_data') - recurrent_bias_b = np.zeros(recurrent_weight_b.shape[1]) - self.add_weights_variable(name='recurrent_bias_b', var_name='br_b{index}', data=recurrent_bias_b) - - class GRU(Layer): _expected_attributes = [ Attribute('n_out'), @@ -1509,34 +1475,6 @@ def initialize(self): self.add_output_variable(shape, dims) - -class BidirectionalGRU(GRU): - _expected_attributes = [ - WeightAttribute('weight_b'), - WeightAttribute('bias_b'), - WeightAttribute('recurrent_weight_b'), - WeightAttribute('recurrent_bias_b'), - TypeAttribute('weight_b'), - TypeAttribute('bias_b'), - TypeAttribute('recurrent_weight_b'), - TypeAttribute('recurrent_bias_b'), - ChoiceAttribute('merge_mode', ['sum', 'mul', 'concat', 'ave'], configurable=False, default='concat'), - ] - - def initialize(self): - super().initialize() - - # Add backward layer parameters - # weights - self.add_weights_variable(name='weight_b', var_name='w_b{index}') - - # recurrent weights - self.add_weights_variable(name='recurrent_weight_b', var_name='wr_b{index}') - - # biases - self.add_weights_variable(name='bias_b', var_name='b_b{index}') - self.add_weights_variable(name='recurrent_bias_b', var_name='br_b{index}') - class Bidirectional(Layer): _expected_attributes = [ @@ -1609,9 +1547,7 @@ def initialize(self): name=f'{dir}_recurrent_bias', var_name=(f'br_{dir[0]}_' + '{index}'), data=recurrent_bias ) else: - self.add_weights_variable( - name=f'{dir}_recurrent_bias', var_name=(f'br_{dir[0]}_' + '{index}'), data=recurrent_bias - ) + self.add_weights_variable(name=f'{dir}_recurrent_bias', var_name=(f'br_{dir[0]}_' + '{index}')) class GarNet(Layer): @@ -1902,8 +1838,6 @@ def initialize(self): 'SimpleRNN': SimpleRNN, 'LSTM': LSTM, 'GRU': GRU, - 'BidirectionalLSTM': BidirectionalLSTM, - 'BidirectionalGRU': BidirectionalGRU, 'Bidirectional': Bidirectional, 'QSimpleRNN': SimpleRNN, 'QLSTM': LSTM, diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index 21c05c98f6..c048be99d4 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -81,7 +81,7 @@ def _infer_precision(self, node, types_to_infer): if node_class in ['Embedding']: return self._infer_embedding_precision(node, types_to_infer) - if node_class in ['SimpleRNN', 'LSTM', 'GRU', 'BidirectionalLSTM', 'BidirectionalGRU']: + if node_class in ['SimpleRNN', 'LSTM', 'GRU', 'Bidirectional']: return self._infer_rnn_precision(node, types_to_infer) if node_class in ['ParametrizedActivation']: @@ -554,8 +554,9 @@ def _infer_rnn_precision(self, node, types_to_infer): # for now just do the weights and leave the rest for the default catch rnn_weights = ('weight', 'bias', 'recurrent_weight', 'recurrent_bias') - if node.attributes['direction'] == 'bidirectional': - rnn_weights += ('weight_b', 'bias_b', 'recurrent_weight_b', 'recurrent_bias_b') + if node.class_name == 'Bidirectional': + rnn_weights = [direction + '_' + weight for direction in ['forward', 'backward'] for weight in rnn_weights] + for weightvar in rnn_weights: if f'{weightvar}_t' in types_to_infer: self._infer_default_type(node, f'{weightvar}_t') diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h index 785e7fede8..fbfbc0cec3 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h @@ -6,10 +6,11 @@ #include "nnet_common.h" #include "nnet_dense.h" #include "nnet_recr_activations.h" -#include namespace nnet { +// Struct for the LSTM template + struct lstm_config { // Internal data type definitions typedef float weight_t; @@ -37,32 +38,6 @@ struct lstm_config { template using activation = nnet::activation::relu; }; -struct bidirectionallstm_config : lstm_config { - // Internal data type definitions - typedef float weight_b_t; - typedef float recurrent_weight_b_t; - typedef float bias_b_t; - typedef float recurrent_bias_b_t; -}; - -template struct bidirectional_config { - // Layer Sizes - static const unsigned n_in = 2; - static const unsigned n_parts = 20; - static const unsigned n_out = 2; - static const unsigned table_size = 1024; - - // Resource reuse info - static const unsigned io_type = io_parallel; - static const unsigned reuse_factor = 1; - static const unsigned n_zeros = 0; - static const bool store_weights_in_bram = false; - static const bool use_static = true; - - // Layers info - static const RNNForward_config Forward; - static const RNNBackward_config Backward; -}; // Long Short term Memory NN (LSTM) // Resources: // https://github.com/nicodjimenez/lstm/blob/master/lstm.py @@ -180,18 +155,6 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate nnet::dense(h_state, tmpres_state, param_r, param_br); - /* - std::cout << " tmpres: "; - for (int i = 0; i < CONFIG_T::n_state*4; i++){ - std::cout << " " << tmpres[i]; - } - std::cout << std::endl; - std::cout << " tmpres_state: "; - for (int i = 0; i < CONFIG_T::n_state*4; i++){ - std::cout << " " << tmpres_state[i]; - } - std::cout << std::endl << std::endl; - */ for (int iacc = 0; iacc < (3 * CONFIG_T::n_state); iacc++) { #pragma HLS UNROLL int index = iacc; @@ -204,36 +167,14 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate int index = iacc + CONFIG_T::n_state * 2; inputacc_c[iacc] = tmpres[index] + tmpres_state[index]; } - /* - std::cout << " inputacc_ifo: "; - for (int i = 0; i < CONFIG_T::n_state*3; i++){ - std::cout << " " << inputacc_ifo[i]; - } - std::cout << std::endl; - std::cout << " inputacc_c: "; - for (int i = 0; i < CONFIG_T::n_state; i++){ - std::cout << " " << inputacc_c[i]; - } - std::cout << std::endl << std::endl; - */ + CONFIG_T::template activation_recr::activation(inputacc_ifo, tmpres_ifo); // Now for the confusion matrix CONFIG_T::template activation::activation(inputacc_c, tmpres_c); - /* - std::cout << " tmpres_ifo: "; - for (int i = 0; i < CONFIG_T::n_state*3; i++){ - std::cout << " " << tmpres_ifo[i]; - } - std::cout << std::endl; - std::cout << " tmpres_c: "; - for (int i = 0; i < CONFIG_T::n_state; i++){ - std::cout << " " << tmpres_c[i]; - } - std::cout << std::endl << std::endl; - */ + // Operation: s=g*i+sold*f (update state with buffer to avoid timing issues) for (int iacc = 0; iacc < (CONFIG_T::n_state); iacc++) { #pragma HLS UNROLL @@ -278,7 +219,8 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate } */ -template struct lstm_struct { +template class lstm_class { + public: static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_total[2 * CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], @@ -290,7 +232,8 @@ template s }; }; -template struct lstm_struct_static { +template class lstm_class_static { + public: static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_total[2 * CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], @@ -348,270 +291,6 @@ void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CO } } -template class RNNFunc_Forward, - template class RNNFunc_Backward> -void bidirectional_stack( - data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_out], - typename CONFIG_T::Forward::weight_t param[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult * CONFIG_T::n_in], - typename CONFIG_T::Forward::recurrent_weight_t - param_r[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult * CONFIG_T::Forward::n_state], - typename CONFIG_T::Forward::bias_t param_b[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult], - typename CONFIG_T::Forward::recurrent_bias_t param_br[CONFIG_T::Forward::n_state * CONFIG_T::Forward::n_mult], - typename CONFIG_T::Backward::weight_t - param_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult * CONFIG_T::n_in], - typename CONFIG_T::Backward::recurrent_weight_t - param_r_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult * CONFIG_T::Backward::n_state], - typename CONFIG_T::Backward::bias_t param_b_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult], - typename CONFIG_T::Backward::recurrent_bias_t param_br_back[CONFIG_T::Backward::n_state * CONFIG_T::Backward::n_mult]) { - - res_T h_newstate[(CONFIG_T::Forward::n_mult - 2) * CONFIG_T::Forward::n_state]; - res_T h_newstate_back[(CONFIG_T::Backward::n_mult - 2) * CONFIG_T::Backward::n_state]; - data_T data_in[CONFIG_T::n_in]; - data_T data_in_back[CONFIG_T::n_in]; - bool reset_state = true; - - #pragma HLS ARRAY_PARTITION variable=h_newstate complete - #pragma HLS ARRAY_PARTITION variable=s_newstate complete - #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete - #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete - - for (int ii = 0; ii < (CONFIG_T::Forward::n_mult - 2) * CONFIG_T::Forward::n_state; ii++) { - #pragma HLS UNROLL - h_newstate[ii] = 0; - } - for (int ii = 0; ii < (CONFIG_T::Backward::n_mult - 2) * CONFIG_T::Backward::n_state; ii++) { - #pragma HLS UNROLL - h_newstate_back[ii] = 0; - } - - // std::cout << "Data_t size: " << data_T::size << std::endl; - /* - std::cout << " W: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ - std::cout << " " << param[i_w]; - } - std::cout << "\n WR: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ - std::cout << " " << param_r[i_w]; - } - std::cout << "\n B: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_b[i_w]; - } - std::cout << "\n BR: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_br[i_w]; - } - std::cout << "\n BW: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ - std::cout << " " << param_back[i_w]; - } - std::cout << "\n W_B: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ - std::cout << " " << param_r_back[i_w]; - } - std::cout << "\n B_B: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_b_back[i_w]; - } - std::cout << "\n BR_B:" << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_br_back[i_w]; - } - std::cout << std::endl << std::endl; - - std::cout << " States:" << std::endl << " "; - - std::cout << " " << 0 <<":"; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; - std::cout << std::endl << std::endl; - */ - for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { - for (int j = 0; j < CONFIG_T::n_in; j++) { - #pragma HLS UNROLL - data_in[j] = data[j + iloop * CONFIG_T::n_in]; - data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; - } - RNNFunc_Forward::apply(reset_state, data_in, h_newstate, param, param_r, param_b, - param_br); - RNNFunc_Backward::apply(reset_state, data_in_back, h_newstate_back, param_back, - param_r_back, param_b_back, param_br_back); - /* - std::cout << " " << iloop+1 <<":"; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; - std::cout << std::endl << std::endl; - */ - if (CONFIG_T::n_sequence_out > 1) { - for (int i = (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * iloop, j = 0; - i < (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * iloop + CONFIG_T::Forward::n_state; - i++, j++) { - #pragma HLS UNROLL - res[i] = h_newstate[j]; - } - for (int i = (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * iloop + CONFIG_T::Forward::n_state, - j = 0; - i < (CONFIG_T::Forward::n_state + CONFIG_T::Backward::n_state) * (iloop + 1); i++, j++) { - #pragma HLS UNROLL - res[i] = h_newstate_back[j]; - } - } - reset_state = false; - } - if (CONFIG_T::n_sequence_out == 1) { - for (int i = 0; i < (CONFIG_T::Forward::n_state); i++) { - #pragma HLS UNROLL - res[i] = h_newstate[i]; - } - for (int i = 0; i < (CONFIG_T::Backward::n_state); i++) { - #pragma HLS UNROLL - res[i + CONFIG_T::Forward::n_state] = h_newstate_back[i]; - } - } -} - -template -void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], - res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], - typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { - - res_T h_newstate[CONFIG_T::n_state]; - res_T s_newstate[CONFIG_T::n_state]; - data_T data_in[CONFIG_T::n_in]; - res_T h_newstate_back[CONFIG_T::n_state]; - res_T s_newstate_back[CONFIG_T::n_state]; - data_T data_in_back[CONFIG_T::n_in]; - bool reset_state = true; - - #pragma HLS ARRAY_PARTITION variable=h_newstate complete - #pragma HLS ARRAY_PARTITION variable=s_newstate complete - #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete - #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete - - for (int ii = 0; ii < CONFIG_T::n_state; ii++) { - #pragma HLS UNROLL - h_newstate[ii] = 0; - s_newstate[ii] = 0; - h_newstate_back[ii] = 0; - s_newstate_back[ii] = 0; - } - - // std::cout << "Data_t size: " << data_T::size << std::endl; - /* - std::cout << " W: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ - std::cout << " " << param[i_w]; - } - std::cout << "\n WR: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ - std::cout << " " << param_r[i_w]; - } - std::cout << "\n B: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_b[i_w]; - } - std::cout << "\n BR: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_br[i_w]; - } - std::cout << "\n BW: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_in; i_w++){ - std::cout << " " << param_back[i_w]; - } - std::cout << "\n W_B: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4 * CONFIG_T::n_state; i_w++){ - std::cout << " " << param_r_back[i_w]; - } - std::cout << "\n B_B: " << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_b_back[i_w]; - } - std::cout << "\n BR_B:" << std::endl << " "; - for (int i_w=0; i_w < CONFIG_T::n_state * 4; i_w++){ - std::cout << " " << param_br_back[i_w]; - } - std::cout << std::endl << std::endl; - - std::cout << " States:" << std::endl << " "; - - std::cout << " " << 0 <<":"; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; - std::cout << std::endl << std::endl; - */ - for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { - for (int j = 0; j < CONFIG_T::n_in; j++) { - #pragma HLS UNROLL - data_in[j] = data[j + iloop * CONFIG_T::n_in]; - data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; - } - if (CONFIG_T::use_static) { - nnet::lstm_static(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, - param_br); - nnet::lstm_static(reset_state, data_in_back, h_newstate_back, s_newstate_back, - param_back, param_r_back, param_b_back, param_br_back); - } else { - nnet::lstm(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, - param_br); - nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, - param_r_back, param_b_back, param_br_back); - } - /* - std::cout << " " << iloop+1 <<":"; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate[k]; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << h_newstate_back[k] ; - std::cout << std::endl << " "; - for(int k = 0; k < CONFIG_T::n_state; k++) std::cout << " " << s_newstate_back[k]; - std::cout << std::endl << std::endl; - */ - if (CONFIG_T::n_sequence_out > 1) { - for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { - #pragma HLS UNROLL - res[i] = h_newstate[j]; - } - for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; - i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { - #pragma HLS UNROLL - res[i] = h_newstate_back[j]; - } - } - reset_state = false; - } - if (CONFIG_T::n_sequence_out == 1) - for (int i = 0; i < (CONFIG_T::n_state); i++) { - #pragma HLS UNROLL - res[i] = h_newstate[i]; - res[i + CONFIG_T::n_state] = h_newstate_back[i]; - } -} - template void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newstate[CONFIG_T::n_state], s_T s_newstate[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], @@ -647,63 +326,6 @@ void lstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newsta } } -template -void bidirectionallstm_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_newstate[CONFIG_T::n_state], - s_T s_newstate[CONFIG_T::n_state], h_T h_newstate_back[CONFIG_T::n_state], - s_T s_newstate_back[CONFIG_T::n_state], - res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], - typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { - - data_T data_in[CONFIG_T::n_in]; - data_T data_in_back[CONFIG_T::n_in]; - bool reset_state = false; - - std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; - std::cout << "Data_t size: " << data_T::size << std::endl; - std::cout << std::endl << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; - - #pragma HLS ARRAY_PARTITION variable=h_newstate complete - #pragma HLS ARRAY_PARTITION variable=s_newstate complete - #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete - #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete - - for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { - for (int j = 0; j < CONFIG_T::n_in; j++) { - #pragma HLS UNROLL - data_in[j] = data[j + iloop * CONFIG_T::n_in]; - data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; - } - nnet::lstm(reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); - nnet::lstm(reset_state, data_in_back, h_newstate_back, s_newstate_back, param_back, - param_r_back, param_b_back, param_br_back); - if (CONFIG_T::n_sequence_out > 1) { - for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { - #pragma HLS UNROLL - res[i] = h_newstate[j]; - } - for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; - i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { - #pragma HLS UNROLL - res[i] = h_newstate_back[j]; - } - } - reset_state = false; - } - if (CONFIG_T::n_sequence_out == 1) - for (int i = 0; i < (CONFIG_T::n_state); i++) { - #pragma HLS UNROLL - res[i] = h_newstate[i]; - res[i + CONFIG_T::n_state] = h_newstate_back[i]; - } -} - template void lstm_stack(hls::stream &data_stream, hls::stream &res_stream, typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], @@ -771,82 +393,6 @@ void lstm_stack(hls::stream &data_stream, hls::stream &res_stream } } -/* BiDirectional LSTM io_stream implementation: not implemented yet -template -void bidirectionallstm_stack(hls::stream &data_stream, hls::stream &res_stream, - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], - typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_b_t param_r_back[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 4]) { - - typename res_T::value_type h_newstate[CONFIG_T::n_state]; - typename res_T::value_type s_newstate[CONFIG_T::n_state]; - typename res_T::value_type h_newstate_back[CONFIG_T::n_state]; - typename res_T::value_type s_newstate_back[CONFIG_T::n_state]; - #pragma HLS ARRAY_PARTITION variable=h_newstate complete - #pragma HLS ARRAY_PARTITION variable=s_newstate complete - #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete - #pragma HLS ARRAY_PARTITION variable=s_newstate_back complete - - for (int ii = 0; ii < CONFIG_T::n_state; ii++) { - #pragma HLS UNROLL - h_newstate[ii] = 0; - s_newstate[ii] = 0; - h_newstate_back[ii] = 0; - s_newstate_back[ii] = 0; - } - - typename data_T::value_type data_in[CONFIG_T::n_in]; - typename data_T::value_type data_in_back[CONFIG_T::n_in]; - bool reset_state = true; - -DataPropagation: - for (int i_in = 0; i_in < CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size; i_in++) { - if (CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size > 1) { - // #pragma HLS PIPELINE - } - data_T data_pack = data_stream.read(); - DataPack: - for (int i_pack = 0; i_pack < data_T::size; i_pack++) { - #pragma HLS UNROLL - data_in[i_pack] = data_pack[i_pack]; - } - if (CONFIG_T::use_static) - nnet::lstm_static( - reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); - else - nnet::lstm( - reset_state, data_in, h_newstate, s_newstate, param, param_r, param_b, param_br); - if (CONFIG_T::n_sequence_out > 1) { - res_T res_pack; - PRAGMA_DATA_PACK(res_pack) - ResPack_sequences: - for (int i_pack = 0; i_pack < res_T::size; i_pack++) { - #pragma HLS UNROLL - res_pack[i_pack] = h_newstate[i_pack]; - } - res_stream.write(res_pack); - } - reset_state = false; - } - - if (CONFIG_T::n_sequence_out == 1) { - res_T res_pack; - PRAGMA_DATA_PACK(res_pack) - ResPack: - for (int i_pack = 0; i_pack < res_T::size; i_pack++) { - #pragma HLS UNROLL - res_pack[i_pack] = h_newstate[i_pack]; - } - res_stream.write(res_pack); - } -} -*/ - // Struct for the GRU template struct gru_config { @@ -877,14 +423,6 @@ struct gru_config { template using activation = nnet::activation::relu; }; -struct bidirectionalgru_config : gru_config { - // Internal data type definitions - typedef float weight_b_t; - typedef float recurrent_weight_b_t; - typedef float bias_b_t; - typedef float recurrent_bias_b_t; -}; - template void gru(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], // TODO - Check the layout of the param @@ -1062,7 +600,7 @@ void gru_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[ } */ -template struct gru_struct { +template struct gru_class { static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_state[CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], @@ -1072,10 +610,10 @@ template s }; }; -template struct gru_struct_static { +template struct gru_class_static { static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_state[CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 3 * CONFIG_T::n_state], + typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3]) { nnet::gru_static(reset_state, data, h_state, param, param_zr, param_b, param_br); @@ -1123,70 +661,6 @@ void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CON } } -template -void bidirectionalgru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], - res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3], - typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 3 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_b_t param_zr_back[CONFIG_T::n_state * 3 * CONFIG_T::n_state], - typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 3], - typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 3]) { - - res_T h_state[CONFIG_T::n_state]; - data_T data_in[CONFIG_T::n_in]; - res_T h_state_back[CONFIG_T::n_state]; - data_T data_in_back[CONFIG_T::n_in]; - bool reset_state = true; - - #pragma HLS ARRAY_PARTITION variable=h_state complete - #pragma HLS ARRAY_PARTITION variable=data_in complete - #pragma HLS ARRAY_PARTITION variable=h_state_back complete - #pragma HLS ARRAY_PARTITION variable=data_in_back complete - - for (int ii = 0; ii < CONFIG_T::n_state; ii++) { - #pragma HLS UNROLL - h_state[ii] = 0; - h_state_back[ii] = 0; - } - for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { - for (int j = 0; j < CONFIG_T::n_in; j++) { - #pragma HLS UNROLL - data_in[j] = data[j + iloop * CONFIG_T::n_in]; - data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; - } - if (CONFIG_T::use_static) { - nnet::gru_static(reset_state, data_in, h_state, param, param_zr, param_b, param_br); - nnet::gru_static(reset_state, data_in_back, h_state_back, param_back, param_zr_back, - param_b_back, param_br_back); - } else { - nnet::gru(reset_state, data_in, h_state, param, param_zr, param_b, param_br); - nnet::gru(reset_state, data_in_back, h_state_back, param_back, param_zr_back, - param_b_back, param_br_back); - } - if (CONFIG_T::n_sequence_out > 1) { - for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { - #pragma HLS UNROLL - res[i] = h_state[j]; - } - for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; - i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { - #pragma HLS UNROLL - res[i] = h_state_back[j]; - } - } - reset_state = false; - } - if (CONFIG_T::n_sequence_out == 1) - for (int i = 0; i < (CONFIG_T::n_state); i++) { - #pragma HLS UNROLL - res[i] = h_state[i]; - res[i + CONFIG_T::n_state] = h_state_back[i]; - } -} - template void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_state[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_state], @@ -1222,57 +696,6 @@ void gru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_state[C } } -template -void bidirectionalgru_stack(data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], h_T h_state[CONFIG_T::n_state], - h_T h_state_back[CONFIG_T::n_state], res_T res[CONFIG_T::n_sequence_out * 2 * CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3], - typename CONFIG_T::weight_b_t param_back[CONFIG_T::n_state * 3 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_b_t param_zr_back[CONFIG_T::n_state * 3 * CONFIG_T::n_state], - typename CONFIG_T::bias_b_t param_b_back[CONFIG_T::n_state * 3], - typename CONFIG_T::recurrent_bias_b_t param_br_back[CONFIG_T::n_state * 3]) { - - data_T data_in[CONFIG_T::n_in]; - data_T data_in_back[CONFIG_T::n_in]; - bool reset_state = false; - - #pragma HLS ARRAY_PARTITION variable=h_state complete - #pragma HLS ARRAY_PARTITION variable=data_in complete - #pragma HLS ARRAY_PARTITION variable=h_state_back complete - #pragma HLS ARRAY_PARTITION variable=data_in_back complete - - for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { - for (int j = 0; j < CONFIG_T::n_in; j++) { - #pragma HLS UNROLL - data_in[j] = data[j + iloop * CONFIG_T::n_in]; - data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; - } - nnet::gru(reset_state, data_in, h_state, param, param_zr, param_b, param_br); - nnet::gru(reset_state, data_in_back, h_state_back, param_back, param_zr_back, param_b_back, - param_br_back); - if (CONFIG_T::n_sequence_out > 1) { - for (int i = CONFIG_T::n_state * 2 * iloop, j = 0; i < (CONFIG_T::n_state * (2 * iloop + 1)); i++, j++) { - #pragma HLS UNROLL - res[i] = h_state[j]; - } - for (int i = CONFIG_T::n_state * (2 * (CONFIG_T::n_sequence - iloop) - 1), j = 0; - i < CONFIG_T::n_state * 2 * (CONFIG_T::n_sequence - iloop); i++, j++) { - #pragma HLS UNROLL - res[i] = h_state_back[j]; - } - } - reset_state = false; - } - if (CONFIG_T::n_sequence_out == 1) - for (int i = 0; i < (CONFIG_T::n_state); i++) { - #pragma HLS UNROLL - res[i] = h_state[i]; - res[i + CONFIG_T::n_state] = h_state_back[i]; - } -} - template void gru_stack(hls::stream &data_stream, hls::stream &res_stream, typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], @@ -1332,6 +755,132 @@ void gru_stack(hls::stream &data_stream, hls::stream &res_stream, } } +// Struct for the Bidirectional template + +struct single_layer_config { + // Internal data type definitions + typedef float weight_t; + typedef float recurrent_weight_t; + typedef float bias_t; + typedef float recurrent_bias_t; + typedef float accum_t; + + // Layer Sizes + static const unsigned n_in = 2; + static const unsigned n_state = 2; + static const unsigned n_mult = 3; + static const unsigned table_size = 1024; + + template using activation_recr = nnet::activation::relu; + template using activation = nnet::activation::relu; +}; + +struct bidirectional_config { + // Layer Sizes + static const unsigned n_in = 2; + static const unsigned n_parts = 20; + static const unsigned n_out = 2; + static const unsigned table_size = 1024; + + // Resource reuse info + static const unsigned io_type = io_parallel; + static const unsigned reuse_factor = 1; + static const unsigned n_zeros = 0; + static const bool store_weights_in_bram = false; + static const bool use_static = true; + + // Layers info + + template + using RNNfunc_forward = nnet::lstm_class; + template + using RNNfunc_backward = nnet::lstm_class; +}; + +template +void bidirectional_stack( + data_T data[CONFIG_T::n_sequence * CONFIG_T::n_in], res_T res[CONFIG_T::n_sequence_out * CONFIG_T::n_out], + typename CONFIG_T::FORWARD_CONFIG::weight_t + param[CONFIG_T::FORWARD_CONFIG::n_state * CONFIG_T::FORWARD_CONFIG::n_mult * CONFIG_T::n_in], + typename CONFIG_T::FORWARD_CONFIG::recurrent_weight_t + param_r[CONFIG_T::FORWARD_CONFIG::n_state * CONFIG_T::FORWARD_CONFIG::n_mult * CONFIG_T::FORWARD_CONFIG::n_state], + typename CONFIG_T::FORWARD_CONFIG::bias_t param_b[CONFIG_T::FORWARD_CONFIG::n_state * CONFIG_T::FORWARD_CONFIG::n_mult], + typename CONFIG_T::FORWARD_CONFIG::recurrent_bias_t + param_br[CONFIG_T::FORWARD_CONFIG::n_state * CONFIG_T::FORWARD_CONFIG::n_mult], + typename CONFIG_T::BACKWARD_CONFIG::weight_t + param_back[CONFIG_T::BACKWARD_CONFIG::n_state * CONFIG_T::BACKWARD_CONFIG::n_mult * CONFIG_T::n_in], + typename CONFIG_T::BACKWARD_CONFIG::recurrent_weight_t + param_r_back[CONFIG_T::BACKWARD_CONFIG::n_state * CONFIG_T::BACKWARD_CONFIG::n_mult * + CONFIG_T::BACKWARD_CONFIG::n_state], + typename CONFIG_T::BACKWARD_CONFIG::bias_t + param_b_back[CONFIG_T::BACKWARD_CONFIG::n_state * CONFIG_T::BACKWARD_CONFIG::n_mult], + typename CONFIG_T::BACKWARD_CONFIG::recurrent_bias_t + param_br_back[CONFIG_T::BACKWARD_CONFIG::n_state * CONFIG_T::BACKWARD_CONFIG::n_mult]) { + + res_T h_newstate[(CONFIG_T::FORWARD_CONFIG::n_mult - 2) * CONFIG_T::FORWARD_CONFIG::n_state]; + res_T h_newstate_back[(CONFIG_T::BACKWARD_CONFIG::n_mult - 2) * CONFIG_T::BACKWARD_CONFIG::n_state]; + data_T data_in[CONFIG_T::n_in]; + data_T data_in_back[CONFIG_T::n_in]; + bool reset_state = true; + + #pragma HLS ARRAY_PARTITION variable=h_newstate complete + #pragma HLS ARRAY_PARTITION variable=h_newstate_back complete + + for (int ii = 0; ii < (CONFIG_T::FORWARD_CONFIG::n_mult - 2) * CONFIG_T::FORWARD_CONFIG::n_state; ii++) { + #pragma HLS UNROLL + h_newstate[ii] = 0; + } + for (int ii = 0; ii < (CONFIG_T::BACKWARD_CONFIG::n_mult - 2) * CONFIG_T::BACKWARD_CONFIG::n_state; ii++) { + #pragma HLS UNROLL + h_newstate_back[ii] = 0; + } + + for (int iloop = 0; iloop < CONFIG_T::n_sequence; iloop++) { + for (int j = 0; j < CONFIG_T::n_in; j++) { + #pragma HLS UNROLL + data_in[j] = data[j + iloop * CONFIG_T::n_in]; + data_in_back[j] = data[j + (CONFIG_T::n_sequence - iloop - 1) * CONFIG_T::n_in]; + } + + CONFIG_T::template RNNfunc_forward::apply( + reset_state, data_in, h_newstate, param, param_r, param_b, param_br); + CONFIG_T::template RNNfunc_backward::apply( + reset_state, data_in_back, h_newstate_back, param_back, param_r_back, param_b_back, param_br_back); + + if (CONFIG_T::n_sequence_out > 1) { + for (int i = (CONFIG_T::FORWARD_CONFIG::n_state + CONFIG_T::BACKWARD_CONFIG::n_state) * iloop, j = 0; + i < (CONFIG_T::FORWARD_CONFIG::n_state + CONFIG_T::BACKWARD_CONFIG::n_state) * iloop + + CONFIG_T::FORWARD_CONFIG::n_state; + i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate[j]; + } + for (int i = (CONFIG_T::FORWARD_CONFIG::n_state + CONFIG_T::BACKWARD_CONFIG::n_state) * + (CONFIG_T::n_sequence - iloop) - + CONFIG_T::BACKWARD_CONFIG::n_state, + j = 0; + i < + (CONFIG_T::FORWARD_CONFIG::n_state + CONFIG_T::BACKWARD_CONFIG::n_state) * (CONFIG_T::n_sequence - iloop); + i++, j++) { + #pragma HLS UNROLL + res[i] = h_newstate_back[j]; + } + } + reset_state = false; + } + + if (CONFIG_T::n_sequence_out == 1) { + for (int i = 0; i < (CONFIG_T::FORWARD_CONFIG::n_state); i++) { + #pragma HLS UNROLL + res[i] = h_newstate[i]; + } + for (int i = 0; i < (CONFIG_T::BACKWARD_CONFIG::n_state); i++) { + #pragma HLS UNROLL + res[i + CONFIG_T::FORWARD_CONFIG::n_state] = h_newstate_back[i]; + } + } +} + } // namespace nnet #endif From dd4f2205f5412bf75d378832fd5be62b8b4ffc62 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Wed, 11 Jun 2025 14:14:29 +0200 Subject: [PATCH 15/26] ADD support for reverse order layers --- hls4ml/converters/keras/recurrent.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index 6b2b5b6270..0171381ea4 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -118,12 +118,14 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade assert keras_layer['class_name'] == 'Bidirectional' rnn_forward_layer = keras_layer['config']['layer'] + swapped_order = False if keras_layer['config'].get('backward_layer'): rnn_backward_layer = keras_layer['config']['backward_layer'] if rnn_forward_layer['config']['go_backwards']: temp_layer = rnn_forward_layer.copy() rnn_forward_layer = rnn_backward_layer.copy() rnn_backward_layer = temp_layer + swapped_order = True else: rnn_backward_layer = rnn_forward_layer @@ -138,6 +140,7 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade layer['inputs'] = input_names layer['direction'] = 'bidirectional' + layer['swapped_order'] = swapped_order layer['return_sequences'] = rnn_forward_layer['config']['return_sequences'] layer['return_state'] = rnn_forward_layer['config']['return_state'] layer['time_major'] = rnn_forward_layer['config']['time_major'] if 'time_major' in rnn_forward_layer['config'] else False @@ -171,14 +174,17 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade cell_name = 'simple_rnn' else: cell_name = rnn_layer['class_name'].lower() + temp_dir = direction + if swapped_order: + temp_dir = 'backward' if direction == 'forward' else 'forward' layer[f'{direction}_weight_data'], layer[f'{direction}_recurrent_weight_data'], layer[f'{direction}_bias_data'] = ( get_weights_data( data_reader, layer['name'], [ - f'{direction}_{rnn_layer_name}/{cell_name}_cell/kernel', - f'{direction}_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', - f'{direction}_{rnn_layer_name}/{cell_name}_cell/bias', + f'{temp_dir}_{rnn_layer_name}/{cell_name}_cell/kernel', + f'{temp_dir}_{rnn_layer_name}/{cell_name}_cell/recurrent_kernel', + f'{temp_dir}_{rnn_layer_name}/{cell_name}_cell/bias', ], ) ) From de803b7e33926a976a8bb550a39a4ec86a1f2aa3 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Wed, 11 Jun 2025 14:15:47 +0200 Subject: [PATCH 16/26] ADD feature check for merge mode and layers order --- hls4ml/backends/vitis/passes/feature_check.py | 38 +++++++++++++++++++ hls4ml/backends/vitis/vitis_backend.py | 2 + 2 files changed, 40 insertions(+) diff --git a/hls4ml/backends/vitis/passes/feature_check.py b/hls4ml/backends/vitis/passes/feature_check.py index a38f6581f6..798499bcf6 100644 --- a/hls4ml/backends/vitis/passes/feature_check.py +++ b/hls4ml/backends/vitis/passes/feature_check.py @@ -49,3 +49,41 @@ def transform(self, model, node): f'WARNING: "ResourceUnrolled" strategy in "{node.name}" ({node.class_name}) may have unexpected II in' 'Vitis backend.\nVerify that the final design satisfies the latency/II constraints.' ) + + +class ValidateBidirectionalMergeMode(OptimizerPass): + _unrolled_layer_cls = ['Bidirectional'] + + def match(self, node): + is_bidirectional_rnn_layer = ( + len([layer_cls for layer_cls in self._unrolled_layer_cls if layer_cls in node.class_name]) > 0 + ) + is_merge_mode_not_concat = node.get_attr('merge_mode', 'concat') != 'concat' + + return is_bidirectional_rnn_layer and is_merge_mode_not_concat + + def transform(self, model, node): + merge_mode = node.get_attr('merge_mode', 'concat') + print( + f'WARNING: "{merge_mode}" merge mode in "{node.name}" ({node.class_name}) is not supported in Vitis backend. ' + 'Switching to "concat" merge mode.' + ) + node.set_attr('merge_mode', 'concat') + + +class ValidateBidirectionalLayerOrder(OptimizerPass): + _unrolled_layer_cls = ['Bidirectional'] + + def match(self, node): + is_bidirectional_rnn_layer = ( + len([layer_cls for layer_cls in self._unrolled_layer_cls if layer_cls in node.class_name]) > 0 + ) + is_layer_order_swapped = node.get_attr('swapped_order', False) + + return is_bidirectional_rnn_layer and is_layer_order_swapped + + def transform(self, model, node): + print( + f'WARNING: The selected order for forward and backward layers in "{node.name}" ({node.class_name}) is not ' + 'supported in Vitis backend. Switching to forward layer first, backward layer last.' + ) diff --git a/hls4ml/backends/vitis/vitis_backend.py b/hls4ml/backends/vitis/vitis_backend.py index 5c63075a46..337f0feafe 100644 --- a/hls4ml/backends/vitis/vitis_backend.py +++ b/hls4ml/backends/vitis/vitis_backend.py @@ -17,6 +17,8 @@ def _register_flows(self): 'vitis:validate_conv_implementation', 'vitis:validate_resource_strategy', 'vitis:validate_resource_unrolled_strategy', + 'vitis:validate_bidirectional_merge_mode', + 'vitis:validate_bidirectional_layer_order', ] validation_flow = register_flow('validation', validation_passes, requires=['vivado:init_layers'], backend=self.name) From 070fdc290050497a2bd8d77a68e00dfb1713aa47 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Wed, 11 Jun 2025 15:01:18 +0200 Subject: [PATCH 17/26] ADD io type feature check --- hls4ml/backends/vitis/passes/feature_check.py | 18 ++++++++++++++++++ hls4ml/backends/vitis/vitis_backend.py | 1 + 2 files changed, 19 insertions(+) diff --git a/hls4ml/backends/vitis/passes/feature_check.py b/hls4ml/backends/vitis/passes/feature_check.py index 798499bcf6..dc5b883ff8 100644 --- a/hls4ml/backends/vitis/passes/feature_check.py +++ b/hls4ml/backends/vitis/passes/feature_check.py @@ -87,3 +87,21 @@ def transform(self, model, node): f'WARNING: The selected order for forward and backward layers in "{node.name}" ({node.class_name}) is not ' 'supported in Vitis backend. Switching to forward layer first, backward layer last.' ) + + +class ValidateBidirectionalIoType(OptimizerPass): + _unrolled_layer_cls = ['Bidirectional'] + + def match(self, node): + is_bidirectional_rnn_layer = ( + len([layer_cls for layer_cls in self._unrolled_layer_cls if layer_cls in node.class_name]) > 0 + ) + is_layer_io_type_stream = node.model.config.config['IOType'] != 'io_parallel' + + return is_bidirectional_rnn_layer and is_layer_io_type_stream + + def transform(self, model, node): + raise Exception( + f'WARNING: "{node.model.config.config["IOType"]}" IO Type is not supported in Vitis backend ' + f'for "{node.name}" ({node.class_name}). Please use "io_parallel".' + ) diff --git a/hls4ml/backends/vitis/vitis_backend.py b/hls4ml/backends/vitis/vitis_backend.py index 337f0feafe..06b7e1753a 100644 --- a/hls4ml/backends/vitis/vitis_backend.py +++ b/hls4ml/backends/vitis/vitis_backend.py @@ -19,6 +19,7 @@ def _register_flows(self): 'vitis:validate_resource_unrolled_strategy', 'vitis:validate_bidirectional_merge_mode', 'vitis:validate_bidirectional_layer_order', + 'vitis:validate_bidirectional_io_type', ] validation_flow = register_flow('validation', validation_passes, requires=['vivado:init_layers'], backend=self.name) From b65c730dbfe02753d249211c4feea1cd44a4b8b4 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Thu, 12 Jun 2025 15:36:43 +0200 Subject: [PATCH 18/26] FIX n_out in case of merge_mode != concat --- hls4ml/converters/keras/recurrent.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index 0171381ea4..2f0479d1b1 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -200,7 +200,10 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade layer[f'{direction}_n_states'] = rnn_layer['config']['units'] - layer['n_out'] = layer['forward_n_states'] + layer['backward_n_states'] + if layer['merge_mode'] == 'concat': + layer['n_out'] = layer['forward_n_states'] + layer['backward_n_states'] + else: + layer['n_out'] = layer['forward_n_states'] if layer['return_sequences']: output_shape = [input_shapes[0][0], layer['n_timesteps'], layer['n_out']] From b55cd04bab5939119429ecd56a10f5eb4ac02c9d Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Thu, 12 Jun 2025 15:39:18 +0200 Subject: [PATCH 19/26] ADD pytest for Bidirectional layer --- test/pytest/test_rnn.py | 257 +++++++++++++++++++++++----------------- 1 file changed, 148 insertions(+), 109 deletions(-) diff --git a/test/pytest/test_rnn.py b/test/pytest/test_rnn.py index 85ea9eb8f2..daa95ac0b6 100644 --- a/test/pytest/test_rnn.py +++ b/test/pytest/test_rnn.py @@ -9,165 +9,204 @@ test_root_path = Path(__file__).parent -rnn_layers = [SimpleRNN, LSTM, GRU] +rnn_layers = [SimpleRNN, LSTM, GRU, Bidirectional] -@pytest.mark.parametrize('rnn_layer', rnn_layers) -@pytest.mark.parametrize('return_sequences', [True, False]) -@pytest.mark.parametrize('bidirectional', [True, False]) -def test_rnn_parsing(rnn_layer, return_sequences, bidirectional): - - if rnn_layer is SimpleRNN and bidirectional: - pytest.skip("SimpleRNN does not support bidirectional layers") - +def create_model_parsing(rnn_layer, return_sequences): time_steps = 3 input_size = 8 input_shape = (time_steps, input_size) model_input = Input(shape=input_shape) - if not bidirectional: + if rnn_layer.__name__ != 'Bidirectional': model_output = rnn_layer(64, return_sequences=return_sequences)(model_input) else: - model_output = Bidirectional(rnn_layer(64, return_sequences=return_sequences))(model_input) + forward_layer = LSTM(37, return_sequences=return_sequences) + bacwkard_layer = GRU(27, return_sequences=return_sequences, go_backwards=True) + model_output = rnn_layer(forward_layer, backward_layer=bacwkard_layer)(model_input) model = Model(model_input, model_output) model.compile(optimizer='adam', loss='mse') - config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado') - prj_name = f'hls4mlprj_rnn_{rnn_layer.__class__.__name__.lower()}_seq_{int(return_sequences)}' - output_dir = str(test_root_path / prj_name) - hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir) + return model - hls_layer = list(hls_model.get_layers())[1] # 0 is input, 1 is the RNN layer - keras_layer = model.layers[1] - # Basic sanity check, I/O, activations - if not bidirectional: - assert hls_layer.class_name == rnn_layer.__name__ +def compare_attributes(hls_layer, keras_layer): + assert hls_layer.class_name == keras_layer.__class__.__name__ + assert hls_layer.get_input_variable().shape == list(keras_layer.input_shape)[1:] # Ignore the batch size + assert hls_layer.get_output_variable().shape == list(keras_layer.output_shape)[1:] # Ignore the batch size + if keras_layer.__class__.__name__ != 'Bidirectional': assert hls_layer.attributes['n_out'] == keras_layer.units assert hls_layer.attributes['activation'] == keras_layer.activation.__name__ if 'recurrent_activation' in hls_layer.attributes: # SimpleRNN doesn't have this assert hls_layer.attributes['recurrent_activation'] == keras_layer.recurrent_activation.__name__ - assert hls_layer.get_input_variable().shape == list(input_shape) - assert hls_layer.get_output_variable().shape == model_output.shape.as_list()[1:] # Ignore the batch size else: - assert hls_layer.class_name == 'Bidirectional' + rnn_layer.__name__ assert hls_layer.attributes['merge_mode'] == keras_layer.merge_mode - if hls_layer.attributes['merge_mode'] == 'concat': - assert hls_layer.attributes['n_out'] == 2 * keras_layer.forward_layer.units + n_out = 0 + for inner_layer, direction in [(keras_layer.forward_layer, 'forward'), (keras_layer.backward_layer, 'backward')]: + assert hls_layer.attributes[f'{direction}_n_states'] == inner_layer.units + if hls_layer.attributes['merge_mode'] == 'concat': + n_out += inner_layer.units + else: + n_out = inner_layer.units + assert hls_layer.attributes[f'{direction}_activation'] == inner_layer.activation.__name__ + if f'{direction}_recurrent_activation' in hls_layer.attributes: # SimpleRNN doesn't have this + assert hls_layer.attributes[f'{direction}_recurrent_activation'] == inner_layer.recurrent_activation.__name__ + assert hls_layer.attributes['n_out'] == n_out + + +def compare_weights(hls_weights, keras_weights, keras_layer): + def comparison(hls_weights, keras_weights, class_name): + assert hls_weights[0].data.shape == keras_weights[0].shape + assert hls_weights[1].data.shape == keras_weights[1].shape + if class_name == 'GRU': + # GRU has both bias and recurrent bias + assert hls_weights[2].data.shape == keras_weights[2][0].shape + assert hls_weights[3].data.shape == keras_weights[2][1].shape else: - assert hls_layer.attributes['n_out'] == keras_layer.forward_layer.units - assert hls_layer.attributes['activation'] == keras_layer.forward_layer.activation.__name__ - if 'recurrent_activation' in hls_layer.attributes: # SimpleRNN doesn't have this - assert hls_layer.attributes['recurrent_activation'] == keras_layer.forward_layer.recurrent_activation.__name__ - assert hls_layer.get_input_variable().shape == list(input_shape) - assert hls_layer.get_output_variable().shape == model_output.shape.as_list()[1:] # Ignore the batch size + # LSTM and SimpleRNN only have bias + assert hls_weights[2].data.shape == keras_weights[2].shape + + np.testing.assert_array_equal(hls_weights[0].data, keras_weights[0]) + np.testing.assert_array_equal(hls_weights[1].data, keras_weights[1]) + if class_name == 'GRU': + np.testing.assert_array_equal(hls_weights[2].data, keras_weights[2][0]) + np.testing.assert_array_equal(hls_weights[3].data, keras_weights[2][1]) + else: + np.testing.assert_array_equal(hls_weights[2].data, keras_weights[2]) + + if keras_layer.__class__.__name__ != 'Bidirectional': + comparison(hls_weights, keras_weights, keras_layer.__class__.__name__) + else: + for i, inner_layer in enumerate([keras_layer.forward_layer, keras_layer.backward_layer]): + comparison(hls_weights[4 * i : 4 * (i + 1)], keras_weights[3 * i : 3 * (i + 1)], inner_layer.__class__.__name__) + + +@pytest.mark.parametrize('rnn_layer', rnn_layers) +@pytest.mark.parametrize('return_sequences', [True, False]) +def test_rnn_parsing(rnn_layer, return_sequences): + + model = create_model_parsing(rnn_layer, return_sequences) + + config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado') + prj_name = f'hls4mlprj_rnn_{rnn_layer.__class__.__name__.lower()}_seq_{int(return_sequences)}' + output_dir = str(test_root_path / prj_name) + hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir) + + hls_layer = list(hls_model.get_layers())[1] # 0 is input, 1 is the RNN layer + keras_layer = model.layers[1] + + # Basic sanity check, I/O, activations + compare_attributes(hls_layer, keras_layer) # Compare weights hls_weights = list(hls_layer.get_weights()) # [weights, recurrent_weights, bias, recurrent_bias] - rnn_weights = keras_layer.get_weights() # [weights, recurrent_weights, bias] - - assert hls_weights[0].data.shape == rnn_weights[0].shape - assert hls_weights[1].data.shape == rnn_weights[1].shape - if 'gru' in rnn_layer.__name__.lower(): - # GRU has both bias and recurrent bias - assert hls_weights[2].data.shape == rnn_weights[2][0].shape - assert hls_weights[3].data.shape == rnn_weights[2][1].shape - else: - # LSTM and SimpleRNN only have bias - assert hls_weights[2].data.shape == rnn_weights[2].shape - - np.testing.assert_array_equal(hls_weights[0].data, rnn_weights[0]) - np.testing.assert_array_equal(hls_weights[1].data, rnn_weights[1]) - if 'gru' in rnn_layer.__name__.lower(): - np.testing.assert_array_equal(hls_weights[2].data, rnn_weights[2][0]) - np.testing.assert_array_equal(hls_weights[3].data, rnn_weights[2][1]) + keras_weights = keras_layer.get_weights() # [weights, recurrent_weights, bias] + compare_weights(hls_weights, keras_weights, keras_layer) + + +def create_model_accuracy(rnn_layer, return_sequences): + # Subtract 0.5 to include negative values + input_shape = (12, 8) + X = np.random.rand(50, *input_shape) - 0.5 + + layer_name = rnn_layer.__name__ + model = Sequential() + model.add(Input(shape=input_shape)) + if layer_name != 'Bidirectional': + test_layer = rnn_layer( + units=32, + input_shape=input_shape, + kernel_initializer='lecun_uniform', + recurrent_initializer='lecun_uniform', + bias_initializer='lecun_uniform', + return_sequences=return_sequences, + name=layer_name, + ) else: - np.testing.assert_array_equal(hls_weights[2].data, rnn_weights[2]) + test_layer = Bidirectional( + LSTM( + units=15, + input_shape=input_shape, + kernel_initializer='lecun_uniform', + recurrent_initializer='lecun_uniform', + bias_initializer='lecun_uniform', + return_sequences=return_sequences, + ), + backward_layer=GRU( + units=17, + input_shape=input_shape, + kernel_initializer='lecun_uniform', + recurrent_initializer='lecun_uniform', + bias_initializer='lecun_uniform', + return_sequences=return_sequences, + go_backwards=True, + ), + name=layer_name, + ) + model.add(test_layer) + model.compile() + return model, X @pytest.mark.parametrize( - 'rnn_layer, bidirectional, backend, io_type, strategy', + 'rnn_layer, backend, io_type, strategy', [ - (SimpleRNN, False, 'Quartus', 'io_parallel', 'resource'), - (SimpleRNN, False, 'oneAPI', 'io_parallel', 'resource'), - (LSTM, False, 'Vivado', 'io_parallel', 'resource'), - (LSTM, False, 'Vivado', 'io_parallel', 'latency'), - (LSTM, False, 'Vitis', 'io_parallel', 'resource'), - (LSTM, False, 'Vitis', 'io_parallel', 'latency'), - (LSTM, True, 'Vivado', 'io_parallel', 'resource'), - (LSTM, True, 'Vivado', 'io_parallel', 'latency'), - (LSTM, True, 'Vitis', 'io_parallel', 'resource'), - (LSTM, True, 'Vitis', 'io_parallel', 'latency'), - (LSTM, False, 'Quartus', 'io_parallel', 'resource'), - (LSTM, False, 'oneAPI', 'io_parallel', 'resource'), - (LSTM, False, 'Vivado', 'io_stream', 'resource'), - (LSTM, False, 'Vivado', 'io_stream', 'latency'), - (LSTM, False, 'Vitis', 'io_stream', 'resource'), - (LSTM, False, 'Vitis', 'io_stream', 'latency'), - (GRU, False, 'Vivado', 'io_parallel', 'resource'), - (GRU, False, 'Vivado', 'io_parallel', 'latency'), - (GRU, False, 'Vitis', 'io_parallel', 'resource'), - (GRU, False, 'Vitis', 'io_parallel', 'latency'), - (GRU, True, 'Vivado', 'io_parallel', 'resource'), - (GRU, True, 'Vivado', 'io_parallel', 'latency'), - (GRU, True, 'Vitis', 'io_parallel', 'resource'), - (GRU, True, 'Vitis', 'io_parallel', 'latency'), - (GRU, False, 'Quartus', 'io_parallel', 'resource'), - (GRU, False, 'oneAPI', 'io_parallel', 'resource'), - (GRU, False, 'Vivado', 'io_stream', 'resource'), - (GRU, False, 'Vivado', 'io_stream', 'latency'), - (GRU, False, 'Vitis', 'io_stream', 'resource'), - (GRU, False, 'Vitis', 'io_stream', 'latency'), - (GRU, False, 'Quartus', 'io_stream', 'resource'), - (GRU, False, 'oneAPI', 'io_stream', 'resource'), + (SimpleRNN, 'Quartus', 'io_parallel', 'resource'), + (SimpleRNN, 'oneAPI', 'io_parallel', 'resource'), + (LSTM, 'Vivado', 'io_parallel', 'resource'), + (LSTM, 'Vivado', 'io_parallel', 'latency'), + (LSTM, 'Vitis', 'io_parallel', 'resource'), + (LSTM, 'Vitis', 'io_parallel', 'latency'), + (LSTM, 'Quartus', 'io_parallel', 'resource'), + (LSTM, 'oneAPI', 'io_parallel', 'resource'), + (LSTM, 'Vivado', 'io_stream', 'resource'), + (LSTM, 'Vivado', 'io_stream', 'latency'), + (LSTM, 'Vitis', 'io_stream', 'resource'), + (LSTM, 'Vitis', 'io_stream', 'latency'), + (GRU, 'Vivado', 'io_parallel', 'resource'), + (GRU, 'Vivado', 'io_parallel', 'latency'), + (GRU, 'Vitis', 'io_parallel', 'resource'), + (GRU, 'Vitis', 'io_parallel', 'latency'), + (GRU, 'Quartus', 'io_parallel', 'resource'), + (GRU, 'oneAPI', 'io_parallel', 'resource'), + (GRU, 'Vivado', 'io_stream', 'resource'), + (GRU, 'Vivado', 'io_stream', 'latency'), + (GRU, 'Vitis', 'io_stream', 'resource'), + (GRU, 'Vitis', 'io_stream', 'latency'), + (GRU, 'Quartus', 'io_stream', 'resource'), + (GRU, 'oneAPI', 'io_stream', 'resource'), + (Bidirectional, 'Vivado', 'io_parallel', 'resource'), + (Bidirectional, 'Vivado', 'io_parallel', 'latency'), + (Bidirectional, 'Vitis', 'io_parallel', 'resource'), + (Bidirectional, 'Vitis', 'io_parallel', 'latency'), ], ) @pytest.mark.parametrize('return_sequences', [True, False]) @pytest.mark.parametrize('static', [True, False]) -def test_rnn_accuracy(rnn_layer, bidirectional, return_sequences, backend, io_type, strategy, static): - # Subtract 0.5 to include negative values - input_shape = (12, 8) - X = np.random.rand(50, *input_shape) - 0.5 - - layer_name = ("Bidirectional" if bidirectional else "") + rnn_layer.__name__ - keras_model = Sequential() - keras_model.add(Input(shape=input_shape)) - test_layer = rnn_layer( - units=32, - input_shape=input_shape, - kernel_initializer='lecun_uniform', - recurrent_initializer='lecun_uniform', - bias_initializer='lecun_uniform', - return_sequences=return_sequences, - name=layer_name, - ) - if not bidirectional: - keras_model.add(test_layer) - else: - keras_model.add(Bidirectional(test_layer, name=layer_name)) +def test_rnn_accuracy(rnn_layer, return_sequences, backend, io_type, strategy, static): + layer_name = rnn_layer.__name__ - keras_model.compile() + model, X = create_model_accuracy(rnn_layer, return_sequences) default_precision = 'ap_fixed<32, 16>' if backend in ['Vivado', 'Vitis'] else 'ac_fixed<32, 16, true>' hls_config = hls4ml.utils.config_from_keras_model( - keras_model, granularity='name', default_precision=default_precision, backend=backend + model, granularity='name', default_precision=default_precision, backend=backend ) hls_config['LayerName'][layer_name]['static'] = static hls_config['LayerName'][layer_name]['Strategy'] = strategy prj_name = ( 'hls4mlprj_rnn_accuracy_' - + ('bidirectional_' if bidirectional else '') + f'{layer_name}_static_{int(static)}_ret_seq_{int(return_sequences)}_' - f'{backend}_{io_type}_{strategy}' + + f'{backend}_{io_type}_{strategy}' ) output_dir = str(test_root_path / prj_name) hls_model = hls4ml.converters.convert_from_keras_model( - keras_model, hls_config=hls_config, output_dir=output_dir, backend=backend, io_type=io_type + model, hls_config=hls_config, output_dir=output_dir, backend=backend, io_type=io_type ) hls_model.compile() - keras_prediction = keras_model.predict(X) + keras_prediction = model.predict(X) hls_prediction = hls_model.predict(X) np.testing.assert_allclose(hls_prediction.flatten(), keras_prediction.flatten(), rtol=0.0, atol=5e-2) From e8fae5488a1596dfac0daa75fe3cbdbde32fc659 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Thu, 12 Jun 2025 15:51:33 +0200 Subject: [PATCH 20/26] FIX posible directions for LSTM and GRU --- hls4ml/model/layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index d337bed68f..7dcd5f5b35 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1353,7 +1353,7 @@ class LSTM(Layer): Attribute('return_sequences', value_type=bool, default=False), Attribute('return_state', value_type=bool, default=False), Attribute('pass_initial_states', value_type=bool, default=False), - ChoiceAttribute('direction', ['forward', 'backward', 'bidirectional'], configurable=False, default='forward'), + ChoiceAttribute('direction', ['forward', 'backward'], configurable=False, default='forward'), Attribute('time_major', value_type=bool, default=False), WeightAttribute('weight'), WeightAttribute('bias'), @@ -1410,7 +1410,7 @@ class GRU(Layer): Attribute('return_sequences', value_type=bool, default=False), Attribute('return_state', value_type=bool, default=False), Attribute('pass_initial_states', value_type=bool, default=False), - ChoiceAttribute('direction', ['forward', 'backward', 'bidirectional'], configurable=False, default='forward'), + ChoiceAttribute('direction', ['forward', 'backward'], configurable=False, default='forward'), Attribute('time_major', value_type=bool, default=False), ChoiceAttribute('apply_reset_gate', ['before', 'after'], configurable=False, default='after'), WeightAttribute('weight'), From a1500e44198c7ab70ed387fb393db74776838cce Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Thu, 12 Jun 2025 16:18:15 +0200 Subject: [PATCH 21/26] FIX spelling mistake --- hls4ml/backends/vivado/passes/recurrent_templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hls4ml/backends/vivado/passes/recurrent_templates.py b/hls4ml/backends/vivado/passes/recurrent_templates.py index 1542500d62..bd72b0be5e 100644 --- a/hls4ml/backends/vivado/passes/recurrent_templates.py +++ b/hls4ml/backends/vivado/passes/recurrent_templates.py @@ -500,7 +500,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) - # TO DO: Add initial tates functions + # TO DO: Add initial states functions ''' if params['pass_initial_states'] == 'true': params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name From 1c16616058eadf1e74283e42f7358fb229cb75c6 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Thu, 12 Jun 2025 16:59:08 +0200 Subject: [PATCH 22/26] FIX order --- .../vivado/passes/recurrent_templates.py | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/hls4ml/backends/vivado/passes/recurrent_templates.py b/hls4ml/backends/vivado/passes/recurrent_templates.py index bd72b0be5e..c005dce8d8 100644 --- a/hls4ml/backends/vivado/passes/recurrent_templates.py +++ b/hls4ml/backends/vivado/passes/recurrent_templates.py @@ -428,6 +428,37 @@ def format(self, node): return template.format(**params) +class BidirectionalFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__((Bidirectional), include_header=recr_include_list) + + def format(self, node): + params = self._default_function_params(node) + + # TO DO: Add initial states functions + ''' + if params['pass_initial_states'] == 'true': + params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name + params['input2'] = node.get_input_variable(node.inputs[1]).name + if node.class_name == 'BLSTM': + params['input3'] = node.get_input_variable(node.inputs[2]).name + params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name + ''' + + params['w'] = node.get_weights('forward_weight').name + params['b'] = node.get_weights('forward_bias').name + params['wr'] = node.get_weights('forward_recurrent_weight').name + params['br'] = node.get_weights('forward_recurrent_bias').name + params['w_b'] = node.get_weights('backward_weight').name + params['b_b'] = node.get_weights('backward_bias').name + params['wr_b'] = node.get_weights('backward_recurrent_weight').name + params['br_b'] = node.get_weights('backward_recurrent_bias').name + + template = bidirectional_function_template + + return template.format(**params) + + time_distributed_config_template = """struct config{index} : nnet::time_distributed_config {{ static const unsigned dim = {dim}; @@ -492,33 +523,3 @@ def format(self, node): return self.template_start.format(**params) else: return self.template_end.format(**params) - -class BidirectionalFunctionTemplate(FunctionCallTemplate): - def __init__(self): - super().__init__((Bidirectional), include_header=recr_include_list) - - def format(self, node): - params = self._default_function_params(node) - - # TO DO: Add initial states functions - ''' - if params['pass_initial_states'] == 'true': - params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name - params['input2'] = node.get_input_variable(node.inputs[1]).name - if node.class_name == 'BLSTM': - params['input3'] = node.get_input_variable(node.inputs[2]).name - params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name - ''' - - params['w'] = node.get_weights('forward_weight').name - params['b'] = node.get_weights('forward_bias').name - params['wr'] = node.get_weights('forward_recurrent_weight').name - params['br'] = node.get_weights('forward_recurrent_bias').name - params['w_b'] = node.get_weights('backward_weight').name - params['b_b'] = node.get_weights('backward_bias').name - params['wr_b'] = node.get_weights('backward_recurrent_weight').name - params['br_b'] = node.get_weights('backward_recurrent_bias').name - - template = bidirectional_function_template - - return template.format(**params) From 2fc981cf99a916c402fc9dff2d6d790fb3742915 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Thu, 12 Jun 2025 17:10:52 +0200 Subject: [PATCH 23/26] FIX remove unused import --- hls4ml/converters/keras/recurrent.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index 2f0479d1b1..8b27c284e2 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -9,7 +9,6 @@ parse_default_keras_layer, parse_keras_model, ) -import numpy as np rnn_layers = ['SimpleRNN', 'LSTM', 'GRU'] merge_modes = ['sum', 'mul', 'concat', 'ave'] @@ -112,7 +111,7 @@ def parse_time_distributed_layer(keras_layer, input_names, input_shapes, data_re layer['output_shape'] = output_shape[1:] # Remove the batch dimension layer['n_time_steps'] = output_shape[1] - + @keras_handler('Bidirectional') def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reader): assert keras_layer['class_name'] == 'Bidirectional' From 48f4fe25f05762e8af4075868c41e22cfe3ca5a0 Mon Sep 17 00:00:00 2001 From: Enrico Lupi Date: Fri, 13 Jun 2025 10:34:51 +0200 Subject: [PATCH 24/26] FIX blank space --- hls4ml/backends/vivado/vivado_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index e9d2a80a2e..a577f35715 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -674,7 +674,7 @@ def init_time_distributed(self, layer): warn(f'Cannot unroll time step loop in layer "{layer.name}" while using "io_stream".') loop_mode = 'off' layer.set_attr('time_step_loop_parallelism', loop_mode) - + @layer_optimizer(Bidirectional) def init_bidirectional(self, layer): reuse_factor = layer.model.config.get_reuse_factor(layer) From 734d42fd58b9d964125775bbe543f0500f8b1a73 Mon Sep 17 00:00:00 2001 From: enlupi Date: Tue, 15 Jul 2025 11:29:28 +0200 Subject: [PATCH 25/26] RM old comments --- .../vivado/passes/recurrent_templates.py | 10 +--- .../vivado/nnet_utils/nnet_recurrent.h | 52 ------------------- 2 files changed, 1 insertion(+), 61 deletions(-) diff --git a/hls4ml/backends/vivado/passes/recurrent_templates.py b/hls4ml/backends/vivado/passes/recurrent_templates.py index c005dce8d8..6f03d674ad 100644 --- a/hls4ml/backends/vivado/passes/recurrent_templates.py +++ b/hls4ml/backends/vivado/passes/recurrent_templates.py @@ -435,15 +435,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) - # TO DO: Add initial states functions - ''' - if params['pass_initial_states'] == 'true': - params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name - params['input2'] = node.get_input_variable(node.inputs[1]).name - if node.class_name == 'BLSTM': - params['input3'] = node.get_input_variable(node.inputs[2]).name - params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name - ''' + # TO DO: Add initial states functions for pytorch settings params['w'] = node.get_weights('forward_weight').name params['b'] = node.get_weights('forward_bias').name diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h index fbfbc0cec3..042a0325ee 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_recurrent.h @@ -192,33 +192,6 @@ void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate } } -/* Alternative lstm_static beginning -template -void lstm_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], - res_T s_newstate[CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 4 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_r[CONFIG_T::n_state * 4 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 4], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 4], - bool backward_selector=false) { - // Initialize the state variable -- will maintain state between function calls - - static res_T h_state_forward[CONFIG_T::n_state]; - static res_T s_state_forward[CONFIG_T::n_state]; - res_T *h_state; - res_T *s_state; - if constexpr (bidirectional) { - static res_T h_state_backward[CONFIG_T::n_state]; - static res_T s_state_backward[CONFIG_T::n_state]; - h_state = backward_selector ? h_state_backward : h_state_forward; - s_state = backward_selector ? s_state_backward : s_state_forward; - } - else { - h_state = h_state_forward; - s_state = s_state_forward; - } -*/ - template class lstm_class { public: static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_total[2 * CONFIG_T::n_state], @@ -347,10 +320,6 @@ void lstm_stack(hls::stream &data_stream, hls::stream &res_stream typename data_T::value_type data_in[CONFIG_T::n_in]; bool reset_state = true; - std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; - std::cout << "Data_t size: " << data_T::size << std::endl; - std::cout << std::endl << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl << std::endl; - DataPropagation: for (int i_in = 0; i_in < CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size; i_in++) { if (CONFIG_T::n_sequence * CONFIG_T::n_in / data_T::size > 1) { @@ -579,27 +548,6 @@ void gru_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[ } } -/* Alternative gru_static beginning -template -void gru_static(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_newstate[CONFIG_T::n_state], - typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], - typename CONFIG_T::recurrent_weight_t param_zr[CONFIG_T::n_state * 3 * CONFIG_T::n_state], - typename CONFIG_T::bias_t param_b[CONFIG_T::n_state * 3], - typename CONFIG_T::recurrent_bias_t param_br[CONFIG_T::n_state * 3], - bool backward_selector=false) { - // Initialize the state variable -- will maintain state between function calls - - static res_T h_state_forward[CONFIG_T::n_state]; - res_T *h_state; - if constexpr (bidirectional) { - static res_T h_state_backward[CONFIG_T::n_state]; - h_state = backward_selector ? h_state_backward : h_state_forward; - } - else { - h_state = h_state_forward; - } -*/ - template struct gru_class { static void apply(bool reset_state, data_T data[CONFIG_T::n_in], res_T h_state[CONFIG_T::n_state], typename CONFIG_T::weight_t param[CONFIG_T::n_state * 3 * CONFIG_T::n_in], From 7c128f7bc2e445fe8db4f0569ca1ac82a53f2d19 Mon Sep 17 00:00:00 2001 From: enlupi Date: Tue, 15 Jul 2025 11:30:07 +0200 Subject: [PATCH 26/26] MV check for out-of-order layers from passes to parsing --- hls4ml/backends/vitis/passes/feature_check.py | 18 ------------------ hls4ml/converters/keras/recurrent.py | 7 ++++--- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/hls4ml/backends/vitis/passes/feature_check.py b/hls4ml/backends/vitis/passes/feature_check.py index dc5b883ff8..48f87168bc 100644 --- a/hls4ml/backends/vitis/passes/feature_check.py +++ b/hls4ml/backends/vitis/passes/feature_check.py @@ -71,24 +71,6 @@ def transform(self, model, node): node.set_attr('merge_mode', 'concat') -class ValidateBidirectionalLayerOrder(OptimizerPass): - _unrolled_layer_cls = ['Bidirectional'] - - def match(self, node): - is_bidirectional_rnn_layer = ( - len([layer_cls for layer_cls in self._unrolled_layer_cls if layer_cls in node.class_name]) > 0 - ) - is_layer_order_swapped = node.get_attr('swapped_order', False) - - return is_bidirectional_rnn_layer and is_layer_order_swapped - - def transform(self, model, node): - print( - f'WARNING: The selected order for forward and backward layers in "{node.name}" ({node.class_name}) is not ' - 'supported in Vitis backend. Switching to forward layer first, backward layer last.' - ) - - class ValidateBidirectionalIoType(OptimizerPass): _unrolled_layer_cls = ['Bidirectional'] diff --git a/hls4ml/converters/keras/recurrent.py b/hls4ml/converters/keras/recurrent.py index 8b27c284e2..f27a970c54 100644 --- a/hls4ml/converters/keras/recurrent.py +++ b/hls4ml/converters/keras/recurrent.py @@ -11,7 +11,6 @@ ) rnn_layers = ['SimpleRNN', 'LSTM', 'GRU'] -merge_modes = ['sum', 'mul', 'concat', 'ave'] @keras_handler(*rnn_layers) @@ -125,6 +124,10 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade rnn_forward_layer = rnn_backward_layer.copy() rnn_backward_layer = temp_layer swapped_order = True + print( + f'WARNING: The selected order for forward and backward layers in "{keras_layer['config']['name']}" ' + f'({keras_layer['class_name']}) is not supported in Vitis backend. Switching to forward layer first, backward layer last.' + ) else: rnn_backward_layer = rnn_forward_layer @@ -139,7 +142,6 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade layer['inputs'] = input_names layer['direction'] = 'bidirectional' - layer['swapped_order'] = swapped_order layer['return_sequences'] = rnn_forward_layer['config']['return_sequences'] layer['return_state'] = rnn_forward_layer['config']['return_state'] layer['time_major'] = rnn_forward_layer['config']['time_major'] if 'time_major' in rnn_forward_layer['config'] else False @@ -148,7 +150,6 @@ def parse_bidirectional_layer(keras_layer, input_names, input_shapes, data_reade raise Exception('Time-major format is not supported by hls4ml') layer['n_timesteps'] = input_shapes[0][1] layer['n_in'] = input_shapes[0][2] - assert keras_layer['config']['merge_mode'] in merge_modes layer['merge_mode'] = keras_layer['config']['merge_mode'] for direction, rnn_layer in [('forward', rnn_forward_layer), ('backward', rnn_backward_layer)]: