Skip to content

Commit 5e45cc2

Browse files
authored
Merge pull request #1299 from vloncar/clone6
Support cloning up to 7 times
2 parents 80b1383 + 17787e8 commit 5e45cc2

File tree

6 files changed

+576
-15
lines changed

6 files changed

+576
-15
lines changed

hls4ml/backends/fpga/passes/clone.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ def transform(self, model, node):
7979
n_outputs = len(output_map[output]) + in_output
8080
if n_outputs == 1:
8181
continue
82-
if n_outputs > 3:
82+
if n_outputs > 7:
8383
msg = f'ERROR: Cloning output {output} of {node.class_name}\
84-
({node.name}) more than 3 times not currently supported'
84+
({node.name}) more than 7 times not currently supported'
8585
raise ValueError(msg)
8686

8787
out_var = node.get_output_variable(output)

hls4ml/templates/catapult/nnet_utils/nnet_stream.h

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,100 @@ void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<
6161
}
6262
}
6363

64+
template <class data_T, class res_T, int N>
65+
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
66+
ac_channel<res_T> &res4) {
67+
#ifndef __SYNTHESIS__
68+
while (data.available(1))
69+
#endif
70+
{
71+
data_T in_data = data.read();
72+
res_T out_data;
73+
74+
ClonePack:
75+
for (int j = 0; j < data_T::size; j++) {
76+
out_data[j] = in_data[j];
77+
}
78+
79+
res1.write(out_data);
80+
res2.write(out_data);
81+
res3.write(out_data);
82+
res4.write(out_data);
83+
}
84+
}
85+
86+
template <class data_T, class res_T, int N>
87+
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
88+
ac_channel<res_T> &res4, ac_channel<res_T> &res5) {
89+
#ifndef __SYNTHESIS__
90+
while (data.available(1))
91+
#endif
92+
{
93+
data_T in_data = data.read();
94+
res_T out_data;
95+
96+
ClonePack:
97+
for (int j = 0; j < data_T::size; j++) {
98+
out_data[j] = in_data[j];
99+
}
100+
101+
res1.write(out_data);
102+
res2.write(out_data);
103+
res3.write(out_data);
104+
res4.write(out_data);
105+
res5.write(out_data);
106+
}
107+
}
108+
109+
template <class data_T, class res_T, int N>
110+
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
111+
ac_channel<res_T> &res4, ac_channel<res_T> &res5, ac_channel<res_T> &res6) {
112+
#ifndef __SYNTHESIS__
113+
while (data.available(1))
114+
#endif
115+
{
116+
data_T in_data = data.read();
117+
res_T out_data;
118+
119+
ClonePack:
120+
for (int j = 0; j < data_T::size; j++) {
121+
out_data[j] = in_data[j];
122+
}
123+
124+
res1.write(out_data);
125+
res2.write(out_data);
126+
res3.write(out_data);
127+
res4.write(out_data);
128+
res5.write(out_data);
129+
res6.write(out_data);
130+
}
131+
}
132+
133+
template <class data_T, class res_T, int N>
134+
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
135+
ac_channel<res_T> &res4, ac_channel<res_T> &res5, ac_channel<res_T> &res6, ac_channel<res_T> &res7) {
136+
#ifndef __SYNTHESIS__
137+
while (data.available(1))
138+
#endif
139+
{
140+
data_T in_data = data.read();
141+
res_T out_data;
142+
143+
ClonePack:
144+
for (int j = 0; j < data_T::size; j++) {
145+
out_data[j] = in_data[j];
146+
}
147+
148+
res1.write(out_data);
149+
res2.write(out_data);
150+
res3.write(out_data);
151+
res4.write(out_data);
152+
res5.write(out_data);
153+
res6.write(out_data);
154+
res7.write(out_data);
155+
}
156+
}
157+
64158
template <class data_T, class res_T, int N> void repack_stream(ac_channel<data_T> &data, ac_channel<res_T> &res) {
65159
if (data_T::size == res_T::size) {
66160
for (int i = 0; i < N / data_T::size; i++) {

hls4ml/templates/oneapi/firmware/nnet_utils/nnet_stream.h

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,159 @@ template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, in
6262
}
6363
}
6464

65+
template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, int N> void clone_stream() {
66+
using data_T = typename ExtractPipeType<data_pipe>::value_type;
67+
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
68+
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
69+
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
70+
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
71+
constexpr auto datasize = std::tuple_size<data_T>{};
72+
CloneLoop:
73+
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
74+
data_T in_data = data_pipe::read();
75+
res1_T out_data1;
76+
res2_T out_data2;
77+
res3_T out_data3;
78+
res4_T out_data4;
79+
80+
ClonePack:
81+
#pragma unroll
82+
for (int j = 0; j < datasize; j++) {
83+
out_data1[j] = in_data[j];
84+
out_data2[j] = in_data[j];
85+
out_data3[j] = in_data[j];
86+
out_data4[j] = in_data[j];
87+
}
88+
89+
res1_pipe::write(out_data1);
90+
res2_pipe::write(out_data2);
91+
res3_pipe::write(out_data3);
92+
res4_pipe::write(out_data4);
93+
}
94+
}
95+
96+
template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, class res5_pipe, int N>
97+
void clone_stream() {
98+
using data_T = typename ExtractPipeType<data_pipe>::value_type;
99+
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
100+
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
101+
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
102+
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
103+
using res5_T = typename ExtractPipeType<res5_pipe>::value_type;
104+
constexpr auto datasize = std::tuple_size<data_T>{};
105+
CloneLoop:
106+
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
107+
data_T in_data = data_pipe::read();
108+
res1_T out_data1;
109+
res2_T out_data2;
110+
res3_T out_data3;
111+
res4_T out_data4;
112+
res5_T out_data5;
113+
114+
ClonePack:
115+
#pragma unroll
116+
for (int j = 0; j < datasize; j++) {
117+
out_data1[j] = in_data[j];
118+
out_data2[j] = in_data[j];
119+
out_data3[j] = in_data[j];
120+
out_data4[j] = in_data[j];
121+
out_data5[j] = in_data[j];
122+
}
123+
124+
res1_pipe::write(out_data1);
125+
res2_pipe::write(out_data2);
126+
res3_pipe::write(out_data3);
127+
res4_pipe::write(out_data4);
128+
res5_pipe::write(out_data5);
129+
}
130+
}
131+
132+
template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, class res5_pipe,
133+
class res6_pipe, int N>
134+
void clone_stream() {
135+
using data_T = typename ExtractPipeType<data_pipe>::value_type;
136+
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
137+
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
138+
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
139+
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
140+
using res5_T = typename ExtractPipeType<res5_pipe>::value_type;
141+
using res6_T = typename ExtractPipeType<res6_pipe>::value_type;
142+
constexpr auto datasize = std::tuple_size<data_T>{};
143+
CloneLoop:
144+
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
145+
data_T in_data = data_pipe::read();
146+
res1_T out_data1;
147+
res2_T out_data2;
148+
res3_T out_data3;
149+
res4_T out_data4;
150+
res5_T out_data5;
151+
res6_T out_data6;
152+
153+
ClonePack:
154+
#pragma unroll
155+
for (int j = 0; j < datasize; j++) {
156+
out_data1[j] = in_data[j];
157+
out_data2[j] = in_data[j];
158+
out_data3[j] = in_data[j];
159+
out_data4[j] = in_data[j];
160+
out_data5[j] = in_data[j];
161+
out_data6[j] = in_data[j];
162+
}
163+
164+
res1_pipe::write(out_data1);
165+
res2_pipe::write(out_data2);
166+
res3_pipe::write(out_data3);
167+
res4_pipe::write(out_data4);
168+
res5_pipe::write(out_data5);
169+
res6_pipe::write(out_data6);
170+
}
171+
}
172+
173+
template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, class res5_pipe,
174+
class res6_pipe, class res7_pipe, int N>
175+
void clone_stream() {
176+
using data_T = typename ExtractPipeType<data_pipe>::value_type;
177+
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
178+
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
179+
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
180+
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
181+
using res5_T = typename ExtractPipeType<res5_pipe>::value_type;
182+
using res6_T = typename ExtractPipeType<res6_pipe>::value_type;
183+
using res7_T = typename ExtractPipeType<res7_pipe>::value_type;
184+
constexpr auto datasize = std::tuple_size<data_T>{};
185+
CloneLoop:
186+
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
187+
data_T in_data = data_pipe::read();
188+
res1_T out_data1;
189+
res2_T out_data2;
190+
res3_T out_data3;
191+
res4_T out_data4;
192+
res5_T out_data5;
193+
res6_T out_data6;
194+
res7_T out_data7;
195+
196+
ClonePack:
197+
#pragma unroll
198+
for (int j = 0; j < datasize; j++) {
199+
out_data1[j] = in_data[j];
200+
out_data2[j] = in_data[j];
201+
out_data3[j] = in_data[j];
202+
out_data4[j] = in_data[j];
203+
out_data5[j] = in_data[j];
204+
out_data6[j] = in_data[j];
205+
out_data7[j] = in_data[j];
206+
}
207+
208+
res1_pipe::write(out_data1);
209+
res2_pipe::write(out_data2);
210+
res3_pipe::write(out_data3);
211+
res4_pipe::write(out_data4);
212+
res5_pipe::write(out_data5);
213+
res6_pipe::write(out_data6);
214+
res6_pipe::write(out_data7);
215+
}
216+
}
217+
65218
template <class data_pipe, class res_pipe, int N> void repack_stream() {
66219
using data_T = typename ExtractPipeType<data_pipe>::value_type;
67220
using res_T = typename ExtractPipeType<res_pipe>::value_type;

0 commit comments

Comments
 (0)