Skip to content

Commit 9693b25

Browse files
Merge branch 'vitis_accelerator_dev' into makefile_update
2 parents 754ead3 + 67eb4df commit 9693b25

File tree

7 files changed

+220
-24
lines changed

7 files changed

+220
-24
lines changed

docs/advanced/accelerator.rst

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,50 @@ The ``predict`` method will send the input data to the PL and return the output
7575
7676
nn = NeuralNetworkOverlay('hls4ml_nn.bit', X_test.shape, y_test.shape)
7777
y_hw, latency, throughput = nn.predict(X_test, profile=True)
78+
79+
========================
80+
VitisAccelerator Backend
81+
========================
82+
83+
The ``VitsAccelerator`` backned makes use of the vitis kernel flow to and streamlines the generation of an hls4ml project targeting PCIe accelerators.
84+
Vitis accelerator backend supports the following boards:
85+
86+
* `Alveo u50 <https://www.xilinx.com/products/boards-and-kits/alveo/u50.html>`_
87+
* `Alveo u55c <https://www.xilinx.com/products/boards-and-kits/alveo/u55c.html>`_
88+
* `Alveo u250 <https://www.xilinx.com/products/boards-and-kits/alveo/u250.html>`_
89+
* `Versal vck5000 <https://www.xilinx.com/products/boards-and-kits/vck5000.html>`_
90+
91+
The backend also generates an `OpenCL` host code that uploads and runs the kernel on the accelerator card.
92+
93+
Example
94+
=======
95+
96+
The following example is a modified version of `hsl4ml example 7 <https://github.com/fastmachinelearning/hls4ml-tutorial/blob/master/part7_deployment.ipynb>`_.
97+
98+
.. code-block:: Python
99+
100+
import hls4ml
101+
hls_model = hls4ml.converters.convert_from_keras_model(
102+
model,
103+
hls_config=config,
104+
output_dir='model_3/hls4ml_prj_vitis_accel',
105+
backend='VitisAccelerator',
106+
board='alveo-u55c',
107+
num_kernel=4,
108+
num_thread=8,
109+
batchsize=8192
110+
)
111+
hls_model.compile()
112+
hls_model.build()
113+
114+
By default the build method generates all the necessary files to run the kernel on the accelerator board. As this can be a long process, there are three build options that target the generation of specific parts of the project:
115+
116+
* `host`: Compiles the host application
117+
* `hls`: Produces only the kernel's object file
118+
* `xclbin`: Produces only the kernel's .xclbin file
119+
120+
The generated host code application and the xclbin file can be executed as such:
121+
122+
.. code-block:: Bash
123+
124+
./host <myproject>.xclbin

hls4ml/backends/vitis_accelerator/supported_boards.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
"memory": {"type": "ddr", "channels": 4, "capacity": 64}
1919
},
2020
"vck5000": {
21-
"board_type": "alveo-versal",
22-
"part": "xcvc1902-2msevsvd1760",
21+
"board_type": "versal",
22+
"part": "xcvc1902-vsvd1760-2MP-e-S",
2323
"platform": "xilinx_vck5000_gen4x8_qdma_2_202220_1",
2424
"memory":{"type": "ddr", "channels": 3, "capacity": 12}
2525
}

hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def create_initial_config(
2121
io_type='io_parallel',
2222
num_kernel=1,
2323
num_thread=1,
24-
batchsize=8192
24+
batchsize=8192,
25+
vivado_directives=[]
2526
):
2627
'''
2728
Create initial accelerator config with default parameters
@@ -32,6 +33,8 @@ def create_initial_config(
3233
io_type: io_parallel or io_stream
3334
num_kernel: how many compute units to create on the fpga
3435
num_thread: how many threads the host cpu uses to drive the fpga
36+
batchsize: how many samples to process within a single buffer on the fpga
37+
vivado_directives: Directives passed down to Vivado that controls the hardware synthesis and implementation steps
3538
Returns:
3639
populated config
3740
'''
@@ -42,6 +45,7 @@ def create_initial_config(
4245
config['AcceleratorConfig']['Num_Kernel'] = num_kernel
4346
config['AcceleratorConfig']['Num_Thread'] = num_thread
4447
config['AcceleratorConfig']['Batchsize'] = batchsize
48+
config['AcceleratorConfig']['Vivado_Directives'] = vivado_directives
4549
return config
4650

4751
def build(self, model, reset=False, synth=True, vsynth=True, csim=False, cosim=False, debug=False, **kwargs):

hls4ml/backends/vitis_accelerator/vitis_accelerator_config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ def __init__(self, config):
3030

3131
self.num_kernel = accel_config.get('Num_Kernel')
3232
self.num_thread = accel_config.get('Num_Thread')
33-
self.batchsize = accel_config.get('Batchsize')
33+
self.batchsize = accel_config.get('Batchsize')
34+
35+
self.vivado_directives = accel_config.get('Vivado_Directives')
3436

3537
def get_board_type(self):
3638
return self.board_type
@@ -52,3 +54,6 @@ def get_memory_type(self):
5254

5355
def get_memory_channel_count(self):
5456
return self.memory_channel_count
57+
58+
def get_vivado_directives(self):
59+
return self.vivado_directives
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
{
2+
"impl.strategies": [
3+
"Performance_Explore",
4+
"Performance_ExplorePostRoutePhysOpt",
5+
"Performance_LBlockPlacement",
6+
"Performance_LBlockPlacementFanoutOpt",
7+
"Performance_NetDelay_high",
8+
"Performance_NetDelay_low",
9+
"Performance_Retiming",
10+
"Performance_ExtraTimingOpt",
11+
"Performance_RefinePlacement",
12+
"Performance_SpreadSLL",
13+
"Performance_BalanceSLL",
14+
"Congestion_SpreadLogic_high",
15+
"Congestion_SpreadLogic_medium",
16+
"Congestion_SpreadLogic_low",
17+
"Congestion_SpreadLogic_Explore",
18+
"Congestion_SSI_SpreadLogic_high",
19+
"Congestion_SSI_SpreadLogic_low",
20+
"Area_Explore",
21+
"Area_ExploreSequential",
22+
"Area_ExploreWithRemap",
23+
"Power_DefaultOpt",
24+
"Power_ExploreArea",
25+
"Flow_RunPhysOpt",
26+
"Flow_RunPostRoutePhysOpt",
27+
"Flow_RuntimeOptimized",
28+
"Flow_Quick",
29+
"ALL"
30+
],
31+
"prop": {
32+
"run": {
33+
"impl": {
34+
"STEPS": {
35+
"OPT_DESIGN": {
36+
"ARGS": {
37+
"DIRECTIVE": [
38+
"Explore",
39+
"ExploreArea",
40+
"ExploreSequentialArea",
41+
"RuntimeOptimized",
42+
"ExploreWithRemap"
43+
]
44+
}
45+
},
46+
"POWER_OPT_DESIGN": {
47+
"IS_ENABLED": [
48+
"true"
49+
]
50+
},
51+
"PLACE_DESIGN": {
52+
"ARGS": {
53+
"DIRECTIVE": [
54+
"Explore",
55+
"WLDrivenBlockPlacement",
56+
"EarlyBlockPlacement",
57+
"ExtraNetDelay_high",
58+
"ExtraNetDelay_low",
59+
"SSI_SpreadLogic_high",
60+
"SSI_SpreadLogic_low",
61+
"AltSpreadLogic_high",
62+
"AltSpreadLogic_medium",
63+
"AltSpreadLogic_low",
64+
"ExtraPostPlacementOpt",
65+
"ExtraTimingOpt",
66+
"SSI_SpreadSLLs",
67+
"SSI_BalanceSLLs",
68+
"SSI_Balance_SLRs",
69+
"SSI_HighUtilSLRs",
70+
"RuntimeOptimized",
71+
"Quick",
72+
"Auto_1",
73+
"Auto_2",
74+
"Auto_3"
75+
]
76+
}
77+
},
78+
"POST_PLACE_POWER_OPT_DESIGN": {
79+
"IS_ENABLED": [
80+
"true"
81+
]
82+
},
83+
"PHYS_OPT_DESIGN": {
84+
"IS_ENABLED": [
85+
"true"
86+
],
87+
"ARGS": {
88+
"DIRECTIVE": [
89+
"Explore",
90+
"ExploreWithHoldFix",
91+
"ExploreWithAggressiveHoldFix",
92+
"AggressiveExplore",
93+
"AlternateReplication",
94+
"AggressiveFanoutOpt",
95+
"AddRetime",
96+
"AlternateFlowWithRetiming",
97+
"RuntimeOptimized"
98+
]
99+
}
100+
},
101+
"ROUTE_DESIGN": {
102+
"ARGS": {
103+
"DIRECTIVE": [
104+
"Explore",
105+
"AggressiveExplore",
106+
"NoTimingRelaxation",
107+
"MoreGlobalIterations",
108+
"HigherDelayCost",
109+
"RuntimeOptimized",
110+
"AlternateCLBRouting",
111+
"Quick"
112+
]
113+
}
114+
},
115+
"POST_ROUTE_PHYS_OPT_DESIGN": {
116+
"IS_ENABLED": [
117+
"true"
118+
],
119+
"ARGS": {
120+
"DIRECTIVE": [
121+
"Explore",
122+
"ExploreWithHoldFix",
123+
"ExploreWithAggressiveHoldFix",
124+
"AggressiveExplore",
125+
"AlternateReplication",
126+
"AggressiveFanoutOpt",
127+
"AddRetime",
128+
"AlternateFlowWithRetiming",
129+
"RuntimeOptimized"
130+
]
131+
}
132+
}
133+
}
134+
}
135+
}
136+
}
137+
}

hls4ml/templates/vitis_accelerator/accelerator_card.cfg

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,8 @@ prop=kernel.kernel_wrapper.kernel_flags=-std=c++11
77

88
[hls]
99
pre_tcl=./hls_config.tcl
10+
# hls-fpga-machine-learning clock control
1011

1112
# hls-fpga-machine-learning kernel control
1213

13-
[vivado]
14-
prop=run.impl_1.STEPS.OPT_DESIGN.IS_ENABLED=true
15-
prop=run.impl_1.STEPS.OPT_DESIGN.ARGS.DIRECTIVE=Explore
16-
17-
prop=run.impl_1.STEPS.PLACE_DESIGN.ARGS.DIRECTIVE=AltSpreadLogic_high
18-
19-
prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.IS_ENABLED=true
20-
prop=run.imp1_1.STEPS.PHYS_OPT_DESIGN.ARGS.DIRECTIVE=AggressiveExplore
21-
22-
prop=run.impl_1.STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE=Explore
23-
24-
prop=run.impl_1.STEPS.POST_ROUTE_PHYS_OPT_DESIGN.IS_ENABLED=true
25-
prop=run.impl_1.STEPS.POST_ROUTE_PHYS_OPT_DESIGN.ARGS.DIRECTIVE=AggressiveExplore
14+
# hls-fpga-machine-learning vivado directives

hls4ml/writer/vitis_accelerator_writer.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,18 @@ def write_accelerator_card_cfg(self, model):
221221
raise Exception(format(self.vitis_accelerator_config.get_platform()) +
222222
' has only ' + format(num_channels) + ' memory banks.')
223223

224+
directives = self.vitis_accelerator_config.get_vivado_directives()
225+
224226
for line in f.readlines():
225227
if 'MYPLATFORM' in line:
226228
newline = line.replace('MYPLATFORM', format(self.vitis_accelerator_config.get_platform()))
229+
elif "# hls-fpga-machine-learning clock control" in line:
230+
freq = round(1e9 / model.config.get_config_value('ClockPeriod'))
231+
newline = 'clock={}:kernel_wrapper\n'.format(freq)
227232
elif '# hls-fpga-machine-learning kernel control' in line:
228233
newline = '[connectivity]\n'
229234
newline += 'nk=kernel_wrapper:' + format(num_kernels) + '\n\n'
230-
if self.vitis_accelerator_config.get_board_type() == "alveo":
235+
if self.vitis_accelerator_config.get_board_type() == 'alveo':
231236
if memory_type == 'hbm':
232237
for i in range(0, num_kernels):
233238
newline += 'sp=kernel_wrapper_{}.in:HBM[{}:{}]\n'.format(i + 1, (i*2)*num_channels_per_cu, ((i*2 + 1)*num_channels_per_cu) - 1)
@@ -239,18 +244,27 @@ def write_accelerator_card_cfg(self, model):
239244
newline += '\n'
240245
for i in range(0, num_kernels):
241246
newline += 'slr=kernel_wrapper_{}:SLR{}\n'.format(i + 1, i)
247+
elif '# hls-fpga-machine-learning vivado directives' in line:
248+
newline = ''
249+
if directives:
250+
newline += '[vivado]\n'
251+
for x in directives:
252+
newline += x + '\n'
242253
else:
243254
newline = line
244255
fout.write(newline)
245256
f.close()
246257
fout.close()
247258

248-
# Copy hls_config.tcl
249-
filedir = os.path.dirname(os.path.abspath(__file__))
250-
srcpath = os.path.join(filedir, '../templates/vitis_accelerator/hls_config.tcl')
251-
dstpath = f'{model.config.get_output_dir()}/hls_config.tcl'
252-
copy(srcpath, dstpath)
253-
259+
# Write hls_config.tcl
260+
tcl_f = open(os.path.join(filedir, '../templates/vitis_accelerator/hls_config.tcl'))
261+
tcl_fout = open(f'{model.config.get_output_dir()}/hls_config.tcl', 'w')
262+
for line in tcl_f.readlines():
263+
newline = line
264+
tcl_fout.write(newline)
265+
tcl_fout.write('\nset_clock_uncertainty {}\n'.format(model.config.get_config_value('ClockUncertainty', '12.5%')))
266+
tcl_f.close()
267+
tcl_fout.close()
254268

255269
def write_nnet_utils_overrides(self, model):
256270
"""Override nnet_types.h pointer comparison

0 commit comments

Comments
 (0)