Skip to content

Commit d519e3a

Browse files
initial version
1 parent bae3855 commit d519e3a

18 files changed

+1325
-596
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"alveo-u55c": {
3+
"part": "xcu55c-fsvh2892-2L-e",
4+
"platform": "xilinx_u55c_gen3x16_xdma_3_202210_1",
5+
"memory": {"type": "hbm", "channels": 32}
6+
},
7+
"alveo-u250": {
8+
"part": "xcu250-figd2104-2L-e",
9+
"platform": "xilinx_u250_xdma_201830_2",
10+
"memory": {"type": "ddr", "channels": 4}
11+
}
12+
}

hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,37 @@ def __init__(self):
1313
self._register_layer_attributes()
1414
self._register_flows()
1515

16+
def create_initial_config(
17+
self,
18+
board='alveo-u55c',
19+
part=None,
20+
clock_period=5,
21+
io_type='io_parallel',
22+
num_kernel=1,
23+
num_thread=1,
24+
batchsize=8192
25+
):
26+
'''
27+
Create initial accelerator config with default parameters
28+
29+
Args:
30+
board: one of the keys defined in supported_boards.json
31+
clock_period: clock period passed to hls project
32+
io_type: io_parallel or io_stream
33+
num_kernel: how many compute units to create on the fpga
34+
num_thread: how many threads the host cpu uses to drive the fpga
35+
Returns:
36+
populated config
37+
'''
38+
board = board if board is not None else 'alveo-u55c'
39+
config = super().create_initial_config(part, clock_period, io_type)
40+
config['AcceleratorConfig'] = {}
41+
config['AcceleratorConfig']['Board'] = board
42+
config['AcceleratorConfig']['Num_Kernel'] = num_kernel
43+
config['AcceleratorConfig']['Num_Thread'] = num_thread
44+
config['AcceleratorConfig']['Batchsize'] = batchsize
45+
return config
46+
1647
def _register_flows(self):
1748
validation_passes = [
1849
'vitisaccelerator:validate_conv_implementation',

hls4ml/backends/vitis_accelerator/vitis_accelerator_config.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,48 @@
99
class VitisAcceleratorConfig:
1010
def __init__(self, config):
1111
self.config = config.config
12+
accel_config = self.config.get('AcceleratorConfig', None)
13+
if accel_config is None:
14+
raise Exception('Missing AcceleratorConfig')
15+
16+
self.board = accel_config.get('Board', 'alveo-u55c')
17+
self.supported_boards = json.load(open(os.path.dirname(__file__) + '/supported_boards.json'))
18+
if self.board in self.supported_boards.keys():
19+
board_info = self.supported_boards[self.board]
20+
self.part = board_info['part']
21+
if self.board.startswith('alveo'):
22+
self.platform = board_info['platform']
23+
self.memory_type = board_info['memory']['type']
24+
self.memory_channel_count = board_info['memory']['channels']
25+
else:
26+
raise Exception('The board does not appear in supported_boards.json file')
1227

13-
self.platform = self.config['AcceleratorConfig'].get(
14-
'Platform', 'xilinx_u250_xdma_201830_2'
15-
) # Get platform folder name
28+
if self.config.get('Part') is not None:
29+
if self.config.get('Part') != self.part:
30+
print(
31+
'WARNING: You set a Part that does not correspond to the Board you specified.'
32+
'The correct Part is now set.'
33+
)
34+
self.config['Part'] = self.part
35+
36+
self.num_kernel = accel_config.get('Num_Kernel')
37+
self.num_thread = accel_config.get('Num_Thread')
38+
self.batchsize = accel_config.get('Batchsize')
1639

1740
def get_platform(self):
18-
return self.platform
41+
return self.platform
42+
43+
def get_num_thread(self):
44+
return self.num_thread
45+
46+
def get_num_kernel(self):
47+
return self.num_kernel
48+
49+
def get_batchsize(self):
50+
return self.batchsize
51+
52+
def get_memory_type(self):
53+
return self.memory_type
54+
55+
def get_memory_channel_count(self):
56+
return self.memory_channel_count
Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,56 @@
1-
.PHONY: clean
2-
3-
PLATFORM=myplatform
1+
# Absolute path to top directory of Git repository
2+
PWD = $(shell readlink -f .)
43

5-
app.exe: ./myproject_host.cpp
6-
g++ -g -std=c++14 ./myproject_host.cpp -o app.exe \
7-
-I firmware/ap_types -I/opt/xilinx/xrt/include \
8-
-L${XILINX_XRT}/lib/ -lxrt_coreutil -lOpenCL -pthread -lrt -lstdc++
4+
#Checks for XILINX_VITIS
5+
ifndef XILINX_VITIS
6+
$(error XILINX_VITIS variable is not set, please set correctly and rerun)
7+
endif
98

10-
myproject_kernel.xo: myproject_kernel.cpp
11-
v++ --target hw --compile -I"./" --config accelerator_card.cfg -o"build/myproject_kernel.xo" "myproject_kernel.cpp"
9+
#Checks for XILINX_XRT
10+
ifndef XILINX_XRT
11+
$(error XILINX_XRT variable is not set, please set correctly and rerun)
12+
endif
1213

13-
ifneq (,$(findstring vck5000,$(PLATFORM)))
14-
myproject_kernel.xsa: ./build/myproject_kernel.xo
15-
v++ -l -t hw --config ./accelerator_card.cfg ./build/myproject_kernel.xo -o myproject_kernel.xsa
14+
#Checks for XILINX_VIVADO
15+
ifndef XILINX_VIVADO
16+
$(error XILINX_VIVADO variable is not set, please set correctly and rerun)
17+
endif
1618

17-
myproject_kernel.xclbin: ./myproject_kernel.xsa
18-
v++ --package -t hw --config ./accelerator_card.cfg ./myproject_kernel.xsa -o myproject_kernel.xclbin
19-
else
20-
myproject_kernel.xclbin: ./myproject_kernel.xo
21-
v++ -l -t hw --config ./accelerator_card.cfg ./build/myproject_kernel.xo -o myproject_kernel.xclbin
19+
#Checks for g++
20+
ifneq ($(shell expr $(shell g++ -dumpversion) \>= 5), 1)
21+
CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-6.2.0/bin/g++
22+
$(warning [WARNING]: g++ version older. Using g++ provided by the tool : $(CXX))
2223
endif
2324

24-
emconfig.json:
25-
emconfigutil --platform xilinx_vck5000_gen4x8_qdma_2_202220_1 --nd 1
25+
KERN_LIBRARIES += -I./ -I./firmware/ -I./firmware/weights -I./firmware/nnet_utils/
2626

27-
clean:
28-
rm -f KERAS_3layer.json KERAS_3layer_weights.h5 prj_tuto_vitis_stream.tar.gz
29-
rm -rf myproject_kernel* myproject_kernel* .exe *json *csv *log *summary _x xilinx* .run .Xil .ipcache *.jou
27+
.PHONY: all
28+
all: host kernel
3029

31-
# Unless specified, use the current directory name as the v++ build target
32-
TARGET ?= $(notdir $(CURDIR))
30+
# Building kernel
31+
./build/myproject_kernel.xo: kernel_wrapper.cpp
32+
mkdir -p ./build
33+
v++ -c -t hw --config ./u55c.cfg kernel_wrapper.cpp firmware/myproject.cpp -o ./build/myproject_kernel.xo $(KERN_LIBRARIES)
34+
35+
myproject_kernel.xclbin: ./build/myproject_kernel.xo
36+
v++ -l -t hw --config ./u55c.cfg ./build/myproject_kernel.xo -o kernel_wrapper.xclbin
37+
38+
# Building Host
39+
INCLUDES += -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -I$(XILINX_HLS)/include/ \
40+
-I$(PWD)/libs/ -I$(PWD)/firmware/ -I$(PWD)/firmware/nnet_utils/
41+
CXXFLAGS += -Wall -std=c++11 -Wno-unknown-pragmas -g -O0
42+
LDFLAGS = -L$(XILINX_XRT)/lib/ -lstdc++ -lpthread -lrt -lOpenCL
43+
44+
host: myproject_host_cl.cpp libs/xcl2.cpp
45+
$(CXX) $(CXXFLAGS) $^ -o $@ $(INCLUDES) $(LDFLAGS)
46+
47+
.PHONY: kernel
48+
kernel: myproject_kernel.xclbin
49+
50+
# Cleaning stuff
51+
.PHONY: clean
52+
clean:
53+
-rm -rf host libs/*.o
54+
-rm -rf *.xclbin*
55+
-rm -rf build*
56+
-rm -rf *.log *.jou *.rpt *.csv *.mdb *.ltx
Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,29 @@
1-
platform=myplatform
1+
kernel=kernel_wrapper
2+
log_dir=build/logs
3+
messageDb=build/kernel_wrapper.mdb
4+
platform=MYPLATFORM
5+
report_dir=build/reports
26
save-temps=1
3-
kernel=myproject_kernel
4-
messageDb=build/myproject_kernel.mdb
57
temp_dir=build
6-
report_dir=build/reports
7-
log_dir=build/logs
88

99
[advanced]
10-
misc=solution_name=myproject_kernel
10+
prop=kernel.kernel_wrapper.kernel_flags=-std=c++11
11+
12+
[hls]
13+
pre_tcl=./hls_config.tcl
14+
15+
# hls-fpga-machine-learning kernel control
16+
17+
[vivado]
18+
prop=run.impl_1.STEPS.OPT_DESIGN.IS_ENABLED=true
19+
prop=run.impl_1.STEPS.OPT_DESIGN.ARGS.DIRECTIVE=Explore
20+
21+
prop=run.impl_1.STEPS.PLACE_DESIGN.ARGS.DIRECTIVE=AltSpreadLogic_high
22+
23+
prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.IS_ENABLED=true
24+
prop=run.imp1_1.STEPS.PHYS_OPT_DESIGN.ARGS.DIRECTIVE=AggressiveExplore
25+
26+
prop=run.impl_1.STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE=Explore
27+
28+
prop=run.impl_1.STEPS.POST_ROUTE_PHYS_OPT_DESIGN.IS_ENABLED=true
29+
prop=run.impl_1.STEPS.POST_ROUTE_PHYS_OPT_DESIGN.ARGS.DIRECTIVE=AggressiveExplore
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
config_interface -m_axi_auto_max_ports=true
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#ifndef KERNEL_WRAPPER_H
2+
#define KERNEL_WRAPPER_H
3+
4+
#include "firmware/defines.h"
5+
6+
// hls-fpga-machine-learning accelerator parameters
7+
8+
// hls-fpga-machine-learning accelerator io
9+
10+
#endif
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#include "kernel_wrapper.h"
2+
#include "firmware/myproject.h"
3+
4+
static void read_input(const in_buffer_t *in, in_buffer_t (&in_buf)[BATCHSIZE][DATA_SIZE_IN]) {
5+
for (int i = 0; i < BATCHSIZE; i++) {
6+
#pragma HLS PIPELINE
7+
for(int j = 0; j < DATA_SIZE_IN; j++) {
8+
#pragma HLS UNROLL
9+
in_buf[i][j] = in[i * DATA_SIZE_IN + j];
10+
}
11+
}
12+
}
13+
static void run_inference(in_buffer_t (&in_buf)[BATCHSIZE][DATA_SIZE_IN], out_buffer_t (&out_buf)[BATCHSIZE][DATA_SIZE_OUT]) {
14+
for (int i = 0; i < BATCHSIZE; i++) {
15+
#pragma HLS DATAFLOW
16+
myproject(in_buf[i],out_buf[i]);
17+
}
18+
}
19+
static void write_result(out_buffer_t *out, out_buffer_t (&out_buf)[BATCHSIZE][DATA_SIZE_OUT]) {
20+
for (int i = 0; i < BATCHSIZE; i++) {
21+
#pragma HLS PIPELINE
22+
for (int j = 0; j < DATA_SIZE_OUT; j++) {
23+
#pragma HLS UNROLL
24+
out[i * DATA_SIZE_OUT + j] = out_buf[i][j];
25+
}
26+
}
27+
}
28+
29+
extern "C" {
30+
/**
31+
\brief HLS4ML Kernel Implementation
32+
\param in Input Vector
33+
\param out Output Vector
34+
*/
35+
void kernel_wrapper(const in_buffer_t *in, out_buffer_t *out) {
36+
in_buffer_t in_buf[BATCHSIZE][DATA_SIZE_IN];
37+
out_buffer_t out_buf[BATCHSIZE][DATA_SIZE_OUT];
38+
#pragma HLS ARRAY_RESHAPE variable=in_buf complete dim=2
39+
#pragma HLS ARRAY_RESHAPE variable=out_buf complete dim=2
40+
41+
#pragma HLS DATAFLOW
42+
read_input(in, in_buf);
43+
run_inference(in_buf, out_buf);
44+
write_result(out, out_buf);
45+
}
46+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#include "kernel_wrapper.h"
2+
#include "firmware/myproject.h"
3+
4+
static void read_input(const in_buffer_t *in, hls::stream<input_t> &input, int n) {
5+
for (int i = 0; i < DATA_SIZE_IN; i++) {
6+
#pragma HLS PIPELINE
7+
input_t tmp;
8+
for (int j = 0; j < NNET_ARRAY_DEPTH; j++) {
9+
#pragma HLS UNROLL
10+
tmp[j] = in[(n * DATA_SIZE_IN * NNET_ARRAY_DEPTH) + (i * NNET_ARRAY_DEPTH) + j];
11+
}
12+
input << tmp;
13+
}
14+
}
15+
16+
static void write_result(out_buffer_t *out, hls::stream<result_t> &output, int n) {
17+
result_t tmp = output.read();
18+
for (int i = 0; i < DATA_SIZE_OUT; i++) {
19+
#pragma HLS UNROLL
20+
out[(n * DATA_SIZE_OUT) + i] = tmp[i];
21+
}
22+
}
23+
24+
extern "C" {
25+
/**
26+
\brief HLS4ML Kernel Implementation
27+
\param in Input Vector
28+
\param out Output Vector
29+
*/
30+
void kernel_wrapper(const in_buffer_t *in, out_buffer_t *out) {
31+
hls::stream<input_t> input("input");
32+
hls::stream<result_t> output("output");
33+
#pragma HLS STREAM variable=input depth=DATA_SIZE_IN
34+
#pragma HLS STREAM variable=output depth=1
35+
36+
for (int n = 0; n < BATCHSIZE; n++) {
37+
#pragma HLS DATAFLOW
38+
read_input(in, input, n);
39+
myproject(input, output);
40+
write_result(out, output, n);
41+
}
42+
}
43+
}

0 commit comments

Comments
 (0)