change 1d transpose weight input to be 2-dimensional (passed from python code)

Jonathan-Shoemaker · Jonathan-Shoemaker · commit 1c345f7eddff · 2022-10-12T22:34:04.000-05:00
diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
@@ -326,6 +326,15 @@ def __init__(self, type_converter):
 
 class StaticWeightVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
+        if self.keep_dims > 0:
+            size_str = ''
+            for dim in range(self.keep_dims):
+                size_str += '[{cur_dim}]'.format(cur_dim=self.shape[dim])
+            final_dim = 1
+            for dim in range(self.keep_dims, len(self.shape)):
+                final_dim *= self.shape[dim]
+            size_str += '[{last_dim}]'.format(last_dim=final_dim)
+            return '{type} {name}{sizes}'.format(type=self.type.name, name=self.name, sizes=size_str)
         return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length)
 
 class StaticWeightVariableConverter(object):
diff --git a/hls4ml/backends/vivado/passes/convolution_templates.py b/hls4ml/backends/vivado/passes/convolution_templates.py
@@ -138,8 +138,6 @@ def format(self, node):
         params = self._default_config_params(node)
         params['dilation'] = node.get_attr('dilation', 1)
         params['nzeros'] = node.get_weights('weight').nzeros
-        params['trfilt_width'] = (node.get_attr('filt_width') + node.get_attr('stride_width') - 1) \
-             // node.get_attr('stride_width')
 
         params['config_t'] = 'config{}_mult'.format(node.index)
         if node.model.config.get_config_value('IOType') == 'io_parallel':
diff --git a/hls4ml/backends/vivado/passes/resource_strategy.py b/hls4ml/backends/vivado/passes/resource_strategy.py
@@ -20,7 +20,22 @@ def transform(self, model, node):
         elif isinstance(node, Conv1D):
             node.weights['weight'].data = np.transpose(node.weights['weight'].data, axes=[2, 0, 1]) #(W,C,F) => (F,W,C)
         elif isinstance(node, Conv1DTranspose):
-            node.weights['weight'].data = np.transpose(node.weights['weight'].data, axes=[1, 0, 2]) #(W,F,C) => (F,W,C)
+            pass
+        #     #(W,F,C) => (F,W,C)
+        #     node.weights['weight'].data = np.transpose(node.weights['weight'].data, axes=[1, 0, 2]) 
+        #     # now split the kernel into stride width kernels (F, W, C) -> (S, F, W/S, C)
+        #     n_filts, kern_width, n_chan = node.weights['weight'].data.shape 
+        #     new_weights = np.zeros((node.get_attr('stride_width'), n_filts, node.get_attr('trfilt_width'), n_chan))
+        #     for i_sw in range(node.get_attr('stride_width')):
+        #         for i_fw in range(node.get_attr('trfilt_width')):
+        #             filt_ind = i_sw + (node.get_attr('trfilt_width')-i_fw-1) * node.get_attr('stride_width')
+        #             for i_nf in range(n_filts):
+        #                 for i_nc in range(n_chan):
+        #                     if filt_ind < kern_width:
+        #                         new_weights[i_sw][i_nf][i_fw][i_nc] = \
+        #                             node.weights['weight'].data[i_nf][filt_ind][i_nc]
+        #     node.weights['weight'].data = new_weights
+        #     print("Updated shape:", node.weights['weight'].data.shape)
         elif isinstance(node, SeparableConv1D):
             node.weights['depthwise'].data = np.transpose(node.weights['depthwise'].data, axes=[2, 0, 1]) #(W,C,F) => (F,W,C)
             node.weights['pointwise'].data = np.transpose(node.weights['pointwise'].data, axes=[2, 0, 1]) #(W,C,F) => (F,W,C)
diff --git a/hls4ml/converters/keras/convolution.py b/hls4ml/converters/keras/convolution.py
@@ -51,6 +51,7 @@ def parse_conv1dtranspose_layer(keras_layer, input_names, input_shapes, data_rea
     layer['filt_width'] = keras_layer['config']['kernel_size'][0]
     layer['stride_width'] = keras_layer['config']['strides'][0]
     layer['padding'] = keras_layer['config']['padding']
+    layer['trfilt_width'] = (layer['filt_width'] + layer['stride_width'] - 1)//layer['stride_width']
 
     (
         layer['out_width'],
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
@@ -162,10 +162,10 @@ def add_output_variable(self, shape, dim_names, out_name=None, var_name='layer{i
 
         self.set_attr(out_name, out)
 
-    def add_weights(self, quantizer=None, compression=False):
+    def add_weights(self, quantizer=None, compression=False, keep_dims=0):
         data = self.model.get_weights_data(self.name, 'kernel')
 
-        self.add_weights_variable(name='weight', var_name='w{index}', data=data, quantizer=quantizer, compression=compression)
+        self.add_weights_variable(name='weight', var_name='w{index}', data=data, quantizer=quantizer, compression=compression, keep_dims=keep_dims)
 
     def add_bias(self, quantizer=None):
         data = self.model.get_weights_data(self.name, 'bias')
@@ -179,7 +179,7 @@ def add_bias(self, quantizer=None):
 
         self.add_weights_variable(name='bias', var_name='b{index}', type_name=type_name, precision=precision, data=data, quantizer=quantizer)
 
-    def add_weights_variable(self, name, var_name=None, type_name=None, precision=None, data=None, quantizer=None, compression=False):
+    def add_weights_variable(self, name, var_name=None, type_name=None, precision=None, data=None, quantizer=None, compression=False, keep_dims=0):
         if var_name is None:
             var_name = name + '{index}'
 
@@ -213,7 +213,7 @@ def add_weights_variable(self, name, var_name=None, type_name=None, precision=No
         elif exponent_type:
             var = ExponentWeightVariable(var_name, type_name=type_name, precision=precision, quantizer=quantizer, data=data, index=self.index)
         else:
-            var = WeightVariable(var_name, type_name=type_name, precision=precision, quantizer=quantizer, data=data, index=self.index)
+            var = WeightVariable(var_name, type_name=type_name, precision=precision, quantizer=quantizer, data=data, index=self.index, keep_dims=keep_dims)
 
         var.data_unquantized = data_unquantized
 
@@ -366,8 +366,28 @@ def initialize(self):
             shape = [self.attributes['n_filt'], self.attributes['out_width']]
             dims = ['N_FILT_{}'.format(self.index), 'N_OUTPUTS_{}'.format(self.index)]
 
+        data = self.model.get_weights_data(self.name, 'kernel')
+        # now we transform the entire kernel
+
+        #(W,F,C) => (F,W,C)
+        data = np.transpose(data, axes=[1, 0, 2]) 
+        # now split the kernel into stride width kernels (F, W, C) -> (S, F, W/S, C)
+        n_filts, kern_width, n_chan = data.shape
+        new_weights = np.zeros((self.attributes['stride_width'], n_filts, self.attributes['trfilt_width'], n_chan))
+        for i_sw in range(self.attributes['stride_width']):
+            for i_fw in range(self.attributes['trfilt_width']):
+                filt_ind = i_sw + (self.attributes['trfilt_width']-i_fw-1) * self.attributes['stride_width']
+                for i_nf in range(n_filts):
+                    for i_nc in range(n_chan):
+                        if filt_ind < kern_width:
+                            new_weights[i_sw][i_nf][i_fw][i_nc] = \
+                                data[i_nf][filt_ind][i_nc]
+        data = new_weights
+
         self.add_output_variable(shape, dims)
-        self.add_weights(quantizer = self.get_attr('weight_quantizer'))
+        # self.add_weights(quantizer = self.get_attr('weight_quantizer'), keep_dims=1)
+        self.add_weights_variable(name='weight', var_name='w{index}', \
+            data=data, quantizer=self.get_attr('weight_quantizer'), keep_dims=1)
         self.add_bias(quantizer = self.get_attr('bias_quantizer'))
 
 
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
@@ -224,11 +224,12 @@ def definition_cpp(self, name_suffix='', as_reference=False):
         return None
 
 class WeightVariable(Variable):
-    def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwargs):
+    def __init__(self, var_name, type_name, precision, data, quantizer=None, keep_dims=0, **kwargs):
         super(WeightVariable, self).__init__(var_name, NamedType(type_name, precision, **kwargs), **kwargs)
         self.data = data
         self.nzeros = -1
         self.shape = list(self.data.shape)
+        print("Weight Variable shape object creation:", self.shape)
         self.data_length = np.prod(self.data.shape)
         self.nonzeros = np.count_nonzero(self.data)
         self.nzeros = self.data_length - self.nonzeros
@@ -237,6 +238,7 @@ def __init__(self, var_name, type_name, precision, data, quantizer=None, **kwarg
         self._iterator = None
         self.update_precision(precision)
         self.quantizer = quantizer
+        self.keep_dims = keep_dims
 
     def __iter__(self):
         self._iterator = np.nditer(self.data, order='C')
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose.h
@@ -34,7 +34,9 @@ template<class data_T, class res_T, typename CONFIG_T>
 void conv_1d_transpose_cl(
     data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
     res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
-    typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::stride_width][
+        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
+    ],
     typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt]
 )
 {
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose_resource.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose_resource.h
@@ -10,11 +10,12 @@ template<class data_T, class res_T, typename CONFIG_T>
 void conv_1d_transpose_resource_cl(
     data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
     res_T res[CONFIG_T::out_width  * CONFIG_T::n_filt],
-    typename CONFIG_T::weight_t weights[CONFIG_T::n_filt * CONFIG_T::filt_width * CONFIG_T::n_chan],
+    typename CONFIG_T::weight_t weights[CONFIG_T::stride_width][
+        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
+    ],
     typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt]
 )
 {
-
     constexpr unsigned mult_n_in = CONFIG_T::trfilt_width * CONFIG_T::n_chan;
     constexpr unsigned mult_n_out = CONFIG_T::n_filt;
     constexpr unsigned block_factor = DIV_ROUNDUP(mult_n_in * mult_n_out, CONFIG_T::reuse_factor);
@@ -30,41 +31,7 @@ void conv_1d_transpose_resource_cl(
     typename CONFIG_T::accum_t acc[CONFIG_T::n_pixels][mult_n_out][CONFIG_T::stride_width];
     #pragma HLS ARRAY_PARTITION variable=acc complete dim=0
 
-    typename CONFIG_T::weight_t trfilt_weights[
-        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
-    ][CONFIG_T::stride_width];
-
-    for (unsigned i_sw = 0; i_sw < CONFIG_T::stride_width; i_sw++) {
-        #pragma HLS UNROLL
-
-        for (unsigned i_fw = 0; i_fw < CONFIG_T::trfilt_width; i_fw++) {
-            #pragma HLS UNROLL
-
-            unsigned filt_ind = i_sw + (CONFIG_T::trfilt_width-i_fw-1)*CONFIG_T::stride_width;
-            for (unsigned i_nf = 0; i_nf < CONFIG_T::n_filt; i_nf++) {
-                #pragma HLS UNROLL
-
-                for (unsigned i_nc = 0; i_nc < CONFIG_T::n_chan; i_nc++) {
-                    #pragma HLS UNROLL
-                    
-                    if (filt_ind < CONFIG_T::filt_width) {
-                        trfilt_weights[
-                            i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw  * CONFIG_T::n_chan + i_nc
-                        ][i_sw] = weights[
-                            i_nf * CONFIG_T::n_chan * CONFIG_T::filt_width + filt_ind * CONFIG_T::n_chan + i_nc
-                        ];
-                    }
-                    else {
-                        trfilt_weights[
-                            i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw * CONFIG_T::n_chan + i_nc
-                        ][i_sw] = 0;
-                    }
-                }
-            }
-        }
-    }
-
-    #pragma HLS ARRAY_RESHAPE variable=trfilt_weights block factor=block_factor dim=1
+    #pragma HLS ARRAY_RESHAPE variable=weights block factor=block_factor dim=2
 
     PartitionLoop:
     for (unsigned i_part = 0; i_part < CONFIG_T::n_partitions; i_part++) {
@@ -109,7 +76,7 @@ void conv_1d_transpose_resource_cl(
 
                         acc[i_pxl][i_out][i_sw] += static_cast<typename CONFIG_T::accum_t>(
                             CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(
-                                data_buf[i_pxl][i_in], trfilt_weights[i_w][i_sw]
+                                data_buf[i_pxl][i_in], weights[i_sw][i_w]
                             )
                         );
                     }
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose_stream.h
@@ -7,49 +7,6 @@
 
 namespace nnet {
 
-template <typename CONFIG_T>
-void load_trfilt_weights_1d(
-    typename CONFIG_T::weight_t trfilt_weights[CONFIG_T::stride_width][
-        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
-    ],
-    typename CONFIG_T::weight_t weights[
-        CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt
-    ]
-)
-{
-    #pragma HLS INLINE
-
-    for (unsigned i_sw = 0; i_sw < CONFIG_T::stride_width; i_sw++) {
-        #pragma HLS UNROLL
-
-        for (unsigned i_fw = 0; i_fw < CONFIG_T::trfilt_width; i_fw++) {
-            #pragma HLS UNROLL
-
-            unsigned filt_ind = i_sw + (CONFIG_T::trfilt_width-i_fw-1)*CONFIG_T::stride_width;
-            for (unsigned i_nf = 0; i_nf < CONFIG_T::n_filt; i_nf++) {
-                #pragma HLS UNROLL
-
-                for (unsigned i_nc = 0; i_nc < CONFIG_T::n_chan; i_nc++) {
-                    #pragma HLS UNROLL
-                    
-                    if (filt_ind < CONFIG_T::filt_width) {
-                        trfilt_weights[i_sw][
-                            i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw  * CONFIG_T::n_chan + i_nc
-                        ] = weights[
-                            i_nf * CONFIG_T::n_chan * CONFIG_T::filt_width + filt_ind * CONFIG_T::n_chan + i_nc
-                        ];
-                    }
-                    else {
-                        trfilt_weights[i_sw][
-                            i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw * CONFIG_T::n_chan + i_nc
-                        ] = 0;
-                    }
-                }
-            }
-        }
-    }
-}
-
 template <class data_T, typename CONFIG_T>
 void kernel_shift_tr_1d(
     const data_T& in_elem,
@@ -81,7 +38,9 @@ template<class data_T, class res_T, typename CONFIG_T>
 void compute_output_buffer_tr_1d(
     const data_T& in_elem,
     hls::stream<res_T> &res_stream,
-    typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::stride_width][
+        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
+    ],
     typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]
 ) 
 {
@@ -106,23 +65,17 @@ void compute_output_buffer_tr_1d(
     // Add pixel to buffer
     nnet::kernel_shift_tr_1d<data_T, CONFIG_T>(in_elem, kernel_data);
 
-    static typename CONFIG_T::weight_t trfilt_weights[CONFIG_T::stride_width][
-        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
-    ];
-
-    load_trfilt_weights_1d<CONFIG_T>(trfilt_weights, weights);
-
     //always do stride number of multiplications
     StrideLoop: for (int idx = 0; idx < CONFIG_T::stride_width; idx++) {
         #pragma HLS UNROLL
         #pragma HLS INLINE region
         // Dense multiply
         if (CONFIG_T::strategy == nnet::latency) {
             dense_latency<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(
-                kernel_data, res_out, trfilt_weights[idx], biases);
+                kernel_data, res_out, weights[idx], biases);
         } else {
             dense_resource<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(
-                kernel_data, res_out, trfilt_weights[idx], biases);
+                kernel_data, res_out, weights[idx], biases);
         }
 
         // Pack output
@@ -135,7 +88,6 @@ void compute_output_buffer_tr_1d(
         }
         // Write output to stream when output ready
         oX++;
-        // weight_start++;
     }
 
     // static var housekeeping
@@ -152,7 +104,9 @@ template<class data_T, class res_T, typename CONFIG_T>
 void conv_1d_transpose_buffer_cl(
     hls::stream<data_T> &data,
     hls::stream<res_T>  &res,
-    typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::stride_width][
+        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
+    ],
     typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt])
 {
     ReadInputWidth: for (unsigned i_iw = 0; i_iw < CONFIG_T::in_width; i_iw++) {
@@ -168,7 +122,9 @@ template<class data_T, class res_T, typename CONFIG_T>
 void conv_1d_transpose_cl(
     hls::stream<data_T> &data,
     hls::stream<res_T> &res,
-    typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
+    typename CONFIG_T::weight_t weights[CONFIG_T::stride_width][
+        CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
+    ],
     typename CONFIG_T::bias_t   biases[CONFIG_T::n_filt]
 )
 {
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h b/hls4ml/templates/vivado/nnet_utils/nnet_helpers.h
@@ -67,6 +67,42 @@ void load_weights_from_txt(T *w, const char* fname) {
     }
 }
 
+template<class T, size_t DIM_1, size_t DIM_2>
+void load_weights_from_txt(T w[DIM_1][DIM_2], const char* fname) {
+
+    std::string full_path = std::string(WEIGHTS_DIR) + "/" + std::string(fname);
+    std::ifstream infile(full_path.c_str(), std::ios::binary);
+
+    if (infile.fail()) {
+        std::cerr << "ERROR: file " << std::string(fname) << " does not exist" << std::endl;
+        exit(1);
+    }
+
+    std::string line;
+    if (std::getline(infile, line)) {
+        std::istringstream iss(line);
+        std::string token;
+
+        size_t i = 0;
+        size_t j = 0;
+        size_t tot = 0;
+        while(std::getline(iss, token, ',')) {
+            std::istringstream(token) >> w[i][j];
+            j++;
+            if (j == DIM_2) {
+                j = 0;
+                i++;
+            }
+            tot++;
+        }
+
+        if (DIM_1*DIM_2 != tot) {
+            std::cerr << "ERROR: Expected " << DIM_1*DIM_2 << " values";
+            std::cerr << " but read only " << tot << " values" << std::endl;
+        }
+    }
+}
+
 template<class T, size_t SIZE>
 void load_compressed_weights_from_txt(T *w, const char* fname) {
 
diff --git a/hls4ml/writer/vivado_writer.py b/hls4ml/writer/vivado_writer.py

Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,7 @@ def parse_conv1dtranspose_layer(keras_layer, input_names, input_shapes, data_rea`
`51`	`51`	`layer['filt_width'] = keras_layer['config']['kernel_size'][0]`
`52`	`52`	`layer['stride_width'] = keras_layer['config']['strides'][0]`
`53`	`53`	`layer['padding'] = keras_layer['config']['padding']`
	`54`	`+ layer['trfilt_width'] = (layer['filt_width'] + layer['stride_width'] - 1)//layer['stride_width']`
`54`	`55`
`55`	`56`	`(`
`56`	`57`	`layer['out_width'],`
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,9 @@ template<class data_T, class res_T, typename CONFIG_T>`
`34`	`34`	`void conv_1d_transpose_cl(`
`35`	`35`	`data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],`
`36`	`36`	`res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],`
`37`		`- typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],`
	`37`	`+ typename CONFIG_T::weight_t weights[CONFIG_T::stride_width][`
	`38`	`+ CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan`
	`39`	`+ ],`
`38`	`40`	`typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]`
`39`	`41`	`)`
`40`	`42`	`{`