@@ -30,33 +30,41 @@ void conv_1d_transpose_resource_cl(
30
30
typename CONFIG_T::accum_t acc[CONFIG_T::n_pixels][mult_n_out][CONFIG_T::stride_width];
31
31
#pragma HLS ARRAY_PARTITION variable=acc complete dim=0
32
32
33
- typename CONFIG_T::weight_t trfilt_weights[CONFIG_T::stride_width][
33
+ typename CONFIG_T::weight_t trfilt_weights[
34
34
CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
35
- ];
35
+ ][CONFIG_T::stride_width] ;
36
36
37
37
for (unsigned i_sw = 0 ; i_sw < CONFIG_T::stride_width; i_sw++) {
38
+ #pragma HLS UNROLL
39
+
38
40
for (unsigned i_fw = 0 ; i_fw < CONFIG_T::trfilt_width; i_fw++) {
41
+ #pragma HLS UNROLL
42
+
39
43
unsigned filt_ind = i_sw + (CONFIG_T::trfilt_width-i_fw-1 )*CONFIG_T::stride_width;
40
44
for (unsigned i_nf = 0 ; i_nf < CONFIG_T::n_filt; i_nf++) {
45
+ #pragma HLS UNROLL
46
+
41
47
for (unsigned i_nc = 0 ; i_nc < CONFIG_T::n_chan; i_nc++) {
42
- if (i_fw < CONFIG_T::filt_width) {
43
- trfilt_weights[i_sw][
48
+ #pragma HLS UNROLL
49
+
50
+ if (filt_ind < CONFIG_T::filt_width) {
51
+ trfilt_weights[
44
52
i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw * CONFIG_T::n_chan + i_nc
45
- ] = weights[
53
+ ][i_sw] = weights[
46
54
filt_ind * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc
47
55
];
48
56
}
49
57
else {
50
- trfilt_weights[i_sw][
58
+ trfilt_weights[
51
59
i_fw * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_nc
52
- ] = 0 ;
60
+ ][i_sw] = 0 ;
53
61
}
54
62
}
55
63
}
56
64
}
57
65
}
58
66
59
- #pragma HLS ARRAY_RESHAPE variable=trfilt_weights block factor=block_factor dim=2
67
+ #pragma HLS ARRAY_RESHAPE variable=trfilt_weights block factor=block_factor dim=1
60
68
61
69
PartitionLoop:
62
70
for (unsigned i_part = 0 ; i_part < CONFIG_T::n_partitions; i_part++) {
@@ -101,7 +109,7 @@ void conv_1d_transpose_resource_cl(
101
109
102
110
acc[i_pxl][i_out][i_sw] += static_cast <typename CONFIG_T::accum_t >(
103
111
CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t >::product (
104
- data_buf[i_pxl][i_in], trfilt_weights[i_sw][i_w ]
112
+ data_buf[i_pxl][i_in], trfilt_weights[i_w][i_sw ]
105
113
)
106
114
);
107
115
}
@@ -124,21 +132,25 @@ void conv_1d_transpose_resource_cl(
124
132
}
125
133
}
126
134
135
+
127
136
PixelResultLoop:
128
137
for (unsigned i_pxl = 0 ; i_pxl < CONFIG_T::n_pixels; i_pxl++) {
129
138
#pragma HLS UNROLL
130
139
131
140
StrideResultLoop:
132
141
for (unsigned i_sw = 0 ; i_sw < CONFIG_T::stride_width; i_sw++) {
133
142
#pragma HLS UNROLL
143
+
144
+ unsigned output_index = i_pxl * CONFIG_T::n_partitions * CONFIG_T::stride_width +
145
+ i_part * CONFIG_T::stride_width + i_sw;
134
146
135
- if (i_pxl * CONFIG_T::n_partitions * CONFIG_T::stride_width + i_part * CONFIG_T::stride_width + i_sw >= CONFIG_T::pad_left &&
136
- i_pxl * CONFIG_T::n_partitions * CONFIG_T::stride_width + i_part * CONFIG_T::stride_width + i_sw < CONFIG_T::out_width + CONFIG_T::pad_left) {
147
+ if (output_index >= CONFIG_T::pad_left &&
148
+ output_index < CONFIG_T::out_width + CONFIG_T::pad_left) {
137
149
ResultLoop:
138
150
for (unsigned i_res = 0 ; i_res < mult_n_out; i_res++) {
139
151
#pragma HLS UNROLL
140
152
141
- *( res++) = cast<data_T, res_T, typename CONFIG_T::mult_config>(acc[i_pxl][i_res][i_sw]);
153
+ res[output_index][i_res] = cast<data_T, res_T, typename CONFIG_T::mult_config>(acc[i_pxl][i_res][i_sw]);
142
154
}
143
155
}
144
156
}
0 commit comments