Skip to content

Commit 631878c

Browse files
committed
Add pipeline directive to the merging functions (and a test)
1 parent dad0d63 commit 631878c

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed

hls4ml/templates/vivado/nnet_utils/nnet_merge.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ void add(
5959
input2_T data2[CONFIG_T::n_elem],
6060
res_T res[CONFIG_T::n_elem])
6161
{
62+
#pragma HLS PIPELINE
63+
6264
for (int ii=0; ii<CONFIG_T::n_elem; ii++) {
6365
res[ii] = data1[ii] + data2[ii];
6466
}
@@ -71,6 +73,8 @@ void subtract(
7173
input2_T data2[CONFIG_T::n_elem],
7274
res_T res[CONFIG_T::n_elem])
7375
{
76+
#pragma HLS PIPELINE
77+
7478
for (int ii=0; ii<CONFIG_T::n_elem; ii++) {
7579
res[ii] = data1[ii] - data2[ii];
7680
}
@@ -82,6 +86,8 @@ void multiply(
8286
input2_T data2[CONFIG_T::n_elem],
8387
res_T res[CONFIG_T::n_elem])
8488
{
89+
#pragma HLS PIPELINE
90+
8591
for (int ii=0; ii<CONFIG_T::n_elem; ii++) {
8692
res[ii] = data1[ii] * data2[ii];
8793
}
@@ -93,6 +99,8 @@ void average(
9399
input2_T data2[CONFIG_T::n_elem],
94100
res_T res[CONFIG_T::n_elem])
95101
{
102+
#pragma HLS PIPELINE
103+
96104
for (int ii=0; ii<CONFIG_T::n_elem; ii++) {
97105
res[ii] = (data1[ii] + data2[ii]) / (res_T) 2;
98106
}
@@ -104,6 +112,8 @@ void maximum(
104112
input2_T data2[CONFIG_T::n_elem],
105113
res_T res[CONFIG_T::n_elem])
106114
{
115+
#pragma HLS PIPELINE
116+
107117
for (int ii=0; ii<CONFIG_T::n_elem; ii++) {
108118
res[ii] = (data1[ii] > data2[ii]) ? data1[ii] : data2[ii];
109119
}
@@ -115,6 +125,8 @@ void minimum(
115125
input2_T data2[CONFIG_T::n_elem],
116126
res_T res[CONFIG_T::n_elem])
117127
{
128+
#pragma HLS PIPELINE
129+
118130
for (int ii=0; ii<CONFIG_T::n_elem; ii++) {
119131
res[ii] = (data1[ii] < data2[ii]) ? data1[ii] : data2[ii];
120132
}
@@ -155,6 +167,8 @@ void concatenate1d(
155167
input2_T data2[CONFIG_T::n_elem2_0],
156168
res_T res[CONFIG_T::n_elem1_0 + CONFIG_T::n_elem2_0])
157169
{
170+
#pragma HLS PIPELINE
171+
158172
for (int ii=0; ii<CONFIG_T::n_elem1_0; ii++) {
159173
res[ii] = data1[ii];
160174
}
@@ -169,6 +183,8 @@ void concatenate2d_0(
169183
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1],
170184
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1])
171185
{
186+
#pragma HLS PIPELINE
187+
172188
for (int ii=0; ii<CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1; ii++) {
173189
res[ii] = data1[ii];
174190
}
@@ -183,6 +199,8 @@ void concatenate2d_1(
183199
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1],
184200
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1])
185201
{
202+
#pragma HLS PIPELINE
203+
186204
for (int ii=0; ii<CONFIG_T::n_elem1_0; ii++) {
187205
for (int jj=0; jj<CONFIG_T::n_elem1_1; jj++) {
188206
res[ii * (CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_1) + jj] = data1[ii * CONFIG_T::n_elem1_1 + jj];
@@ -199,6 +217,8 @@ void concatenate2d(
199217
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1],
200218
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1])
201219
{
220+
#pragma HLS INLINE
221+
202222
if (CONFIG_T::axis == 2 || CONFIG_T::axis == -1) {
203223
concatenate2d_1<input1_T, input2_T, res_T, CONFIG_T>(data1, data2, res);
204224
} else {
@@ -212,6 +232,8 @@ input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],
212232
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2],
213233
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2])
214234
{
235+
#pragma HLS PIPELINE
236+
215237
for (int ii=0; ii<CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2; ii++) {
216238
res[ii] = data1[ii];
217239
}
@@ -226,6 +248,8 @@ input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],
226248
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2],
227249
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2])
228250
{
251+
#pragma HLS PIPELINE
252+
229253
for (int ii=0; ii<CONFIG_T::n_elem1_0; ii++) {
230254
for (int jj=0; jj<CONFIG_T::n_elem1_1; jj++) {
231255
for (int kk=0; kk<CONFIG_T::n_elem1_2; kk++) {
@@ -258,6 +282,8 @@ void concatenate3d_2(
258282
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2],
259283
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2])
260284
{
285+
#pragma HLS PIPELINE
286+
261287
for (int ii=0; ii<CONFIG_T::n_elem1_0; ii++) {
262288
for (int jj=0; jj<CONFIG_T::n_elem1_1; jj++) {
263289
for (int kk=0; kk<CONFIG_T::n_elem1_2; kk++) {
@@ -288,6 +314,8 @@ void concatenate3d(
288314
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2],
289315
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2])
290316
{
317+
#pragma HLS INLINE
318+
291319
if (CONFIG_T::axis == 3 || CONFIG_T::axis == -1) {
292320
concatenate3d_2<input1_T, input2_T, res_T, CONFIG_T>(data1, data2, res);
293321
} else if (CONFIG_T::axis == 2 || CONFIG_T::axis == -2) {

test/pytest/test_merge.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import pytest
2+
import hls4ml
3+
import tensorflow as tf
4+
import numpy as np
5+
from pathlib import Path
6+
from tensorflow.keras.layers import Input, Add, Average, Concatenate, Dot, Maximum, Minimum, Multiply, Subtract
7+
8+
test_root_path = Path(__file__).parent
9+
10+
merge_layer = [Add, Average, Maximum, Minimum, Multiply, Subtract]
11+
io_type_options = ['io_parallel', 'io_stream']
12+
@pytest.mark.parametrize('merge_layer', merge_layer)
13+
@pytest.mark.parametrize('io_type', io_type_options)
14+
def test_merge(merge_layer, io_type):
15+
input_shape = (10, 10, 3)
16+
17+
in1 = Input(shape=input_shape)
18+
in2 = Input(shape=input_shape)
19+
out = merge_layer()([in1, in2])
20+
21+
model = tf.keras.models.Model(inputs=[in1, in2], outputs=out)
22+
model.compile(optimizer='adam', loss='mse')
23+
24+
config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
25+
output_dir = str(test_root_path / 'hls4mlprj_merge_{}_{}'.format(merge_layer.__name__.lower(), io_type))
26+
hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type=io_type)
27+
hls_model.compile()
28+
29+
X_input1 = np.random.rand(100, *input_shape)
30+
X_input2 = np.random.rand(100, *input_shape)
31+
32+
keras_prediction = model.predict([X_input1, X_input2])
33+
hls_prediction = hls_model.predict([X_input1, X_input2]).reshape(keras_prediction.shape)
34+
35+
np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
36+
37+
38+
@pytest.mark.parametrize('axes', [1])
39+
@pytest.mark.parametrize('io_type', ['io_parallel']) # No io_stream implementation yet
40+
def test_dot(axes, io_type):
41+
input_shape = (10,) # Only 1D implemented
42+
43+
in1 = Input(shape=input_shape)
44+
in2 = Input(shape=input_shape)
45+
out = Dot(axes=axes)([in1, in2])
46+
47+
model = tf.keras.models.Model(inputs=[in1, in2], outputs=out)
48+
model.compile(optimizer='adam', loss='mse')
49+
50+
config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
51+
output_dir = str(test_root_path / 'hls4mlprj_dot_axes_{}_{}'.format(str(axes), io_type))
52+
hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type=io_type)
53+
hls_model.compile()
54+
55+
X_input1 = np.random.rand(100, *input_shape)
56+
X_input2 = np.random.rand(100, *input_shape)
57+
58+
keras_prediction = model.predict([X_input1, X_input2])
59+
hls_prediction = hls_model.predict([X_input1, X_input2]).reshape(keras_prediction.shape)
60+
61+
np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
62+
63+
64+
@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
65+
def test_concatenate1d(io_type):
66+
input_shape = (10,)
67+
68+
in1 = Input(shape=input_shape)
69+
in2 = Input(shape=input_shape)
70+
out = Concatenate()([in1, in2])
71+
72+
model = tf.keras.models.Model(inputs=[in1, in2], outputs=out)
73+
model.compile(optimizer='adam', loss='mse')
74+
75+
config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
76+
output_dir = str(test_root_path / 'hls4mlprj_concatenate1d_{}'.format(io_type))
77+
hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type=io_type)
78+
hls_model.compile()
79+
80+
X_input1 = np.random.rand(100, *input_shape)
81+
X_input2 = np.random.rand(100, *input_shape)
82+
83+
keras_prediction = model.predict([X_input1, X_input2])
84+
hls_prediction = hls_model.predict([X_input1, X_input2]).reshape(keras_prediction.shape)
85+
86+
np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
87+
88+
89+
@pytest.mark.parametrize('axis', [1, 2])
90+
@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
91+
def test_concatenate2d(axis, io_type):
92+
input_shape = (10, 3)
93+
94+
in1 = Input(shape=input_shape)
95+
in2 = Input(shape=input_shape)
96+
out = Concatenate(axis=axis)([in1, in2])
97+
98+
model = tf.keras.models.Model(inputs=[in1, in2], outputs=out)
99+
model.compile(optimizer='adam', loss='mse')
100+
101+
config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
102+
output_dir = str(test_root_path /'hls4mlprj_concatenate2d_axis_{}_{}'.format(str(axis), io_type))
103+
hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type=io_type)
104+
hls_model.compile()
105+
106+
X_input1 = np.random.rand(100, *input_shape)
107+
X_input2 = np.random.rand(100, *input_shape)
108+
109+
keras_prediction = model.predict([X_input1, X_input2])
110+
hls_prediction = hls_model.predict([X_input1, X_input2]).reshape(keras_prediction.shape)
111+
112+
np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
113+
114+
115+
@pytest.mark.parametrize('axis', [1, 2, 3])
116+
@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
117+
def test_concatenate3d(axis, io_type):
118+
input_shape = (10, 10, 3)
119+
120+
in1 = Input(shape=input_shape)
121+
in2 = Input(shape=input_shape)
122+
out = Concatenate(axis=axis)([in1, in2])
123+
124+
model = tf.keras.models.Model(inputs=[in1, in2], outputs=out)
125+
model.compile(optimizer='adam', loss='mse')
126+
127+
config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
128+
output_dir = str(test_root_path /'hls4mlprj_concatenate3d_axis_{}_{}'.format(str(axis), io_type))
129+
hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, io_type=io_type)
130+
hls_model.compile()
131+
132+
X_input1 = np.random.rand(100, *input_shape)
133+
X_input2 = np.random.rand(100, *input_shape)
134+
135+
keras_prediction = model.predict([X_input1, X_input2])
136+
hls_prediction = hls_model.predict([X_input1, X_input2]).reshape(keras_prediction.shape)
137+
138+
np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)

0 commit comments

Comments
 (0)