Skip to content

Commit ba23a1c

Browse files
Merge branch 'merging_branch' into vitis_accelerator_dev
2 parents c7e1be2 + 574df99 commit ba23a1c

File tree

6 files changed

+166419
-92
lines changed

6 files changed

+166419
-92
lines changed

hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import sys
33
import subprocess
4+
import numpy as np
45

56
from hls4ml.backends import VitisBackend, VivadoBackend
67
from hls4ml.model.flow import get_flow, register_flow
@@ -69,8 +70,32 @@ def build(self, model, target="all"):
6970
else:
7071
raise Exception("Currently untested on non-Linux OS")
7172

72-
def predict(self, model, x):
73-
raise Exception("TODO: Needs to be implemented")
73+
def _numpy_to_dat(self, model, x):
74+
if len(model.get_input_variables()) != 1:
75+
raise Exception("Currently unsupported for multi-input/output projects")
76+
77+
# Verify numpy array of correct shape
78+
expected_shape = model.get_input_variables()[0].size()
79+
if expected_shape != x.shape[-1]:
80+
raise Exception(f'Input shape mismatch, got {x.shape}, expected (_, {expected_shape})')
81+
82+
# Write to tb_data/tb_input_features.dat
83+
input_dat = open(f'{model.config.get_output_dir()}/tb_data/tb_input_features.dat', 'w')
84+
for input in x:
85+
newline = " ".join(str(n) for n in input)
86+
input_dat.write(newline + '\n')
87+
input_dat.close()
88+
89+
def _dat_to_numpy(self, model):
90+
expected_shape = model.get_output_variables()[0].size()
91+
output_file = f'{model.config.get_output_dir()}/tb_data/hw_results.dat'
92+
y = np.loadtxt(output_file, dtype=float).reshape(-1, expected_shape)
93+
return y
94+
95+
def hardware_predict(self, model, x):
96+
self._numpy_to_dat(model, x)
97+
os.system("./host build/kernel_wrapper.xclbin")
98+
return self._dat_to_numpy(model)
7499

75100
def _register_flows(self):
76101
validation_passes = [

hls4ml/model/graph.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -734,9 +734,6 @@ def _compute_n_samples(self, x):
734734
return int(n_sample)
735735

736736
def predict(self, x):
737-
if self.config.config.get('Backend', 'Vivado') == 'VitisAccelerator':
738-
return self.config.backend.predict(self, x)
739-
740737
top_function, ctype = self._get_top_function(x)
741738
n_samples = self._compute_n_samples(x)
742739
n_inputs = len(self.get_input_variables())
@@ -862,6 +859,14 @@ class TraceData(ctypes.Structure):
862859
else:
863860
return output, trace_output
864861

862+
def hardware_predict(self, x):
863+
"""Currently only supported for VitisAccelerator backend"""
864+
backend = self.config.config.get('Backend', 'Vivado')
865+
if backend != 'VitisAccelerator':
866+
raise Exception(f"Function unsupported for {backend} backend")
867+
868+
return self.config.backend.hadrware_predict(self, x)
869+
865870
def build(self, **kwargs):
866871
"""Builds the generated project using HLS compiler.
867872

hls4ml/templates/vitis_accelerator/myproject_host_cl.cpp

Lines changed: 17 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -28,76 +28,52 @@ int main(int argc, char **argv) {
2828
std::cout << "Usage: " << argv[0] << " <XCLBIN Filename>" << std::endl;
2929
return EXIT_FAILURE;
3030
}
31-
3231
std::string xclbinFilename = argv[1];
3332

3433
/*FPGATYPE*/<in_buffer_t, out_buffer_t> fpga(INSTREAMSIZE, OUTSTREAMSIZE, NUM_CU, NUM_THREAD, 100);
3534

3635
std::vector<cl::Device> devices = xcl::get_xil_devices(); // Utility API that finds xilinx platforms and return a list of devices connected to Xilinx platforms
37-
3836
cl::Program::Binaries bins = xcl::import_binary_file(xclbinFilename); // Load xclbin
39-
4037
fpga.initializeOpenCL(devices, bins);
4138

4239
fpga.allocateHostMemory(NUM_CHANNEL);
4340

44-
std::cout << "Loading input data from tb_data/tb_input_features.dat"
45-
<< "and output predictions from tb_data/tb_output_features.dat" << std::endl;
46-
47-
std::cout << "Writing output predictions to tb_data/tb_output_predictions.dat" << std::endl;
48-
49-
std::ifstream fpr("tb_data/tb_output_predictions.dat");
41+
std::cout << "Loading input data from tb_data/tb_input_features.dat" << std::endl;
5042
std::ifstream fin("tb_data/tb_input_features.dat");
51-
5243
if (!fin.is_open()) {
5344
std::cerr << "Error: Could not open tb_input_features.dat" << std::endl;
5445
}
55-
56-
if (!fpr.is_open()) {
57-
std::cerr << "Error: Could not open tb_output_predictions.dat" << std::endl;
58-
}
59-
6046
std::vector<in_buffer_t> inputData;
61-
std::vector<out_buffer_t> outputPredictions;
62-
if (fin.is_open() && fpr.is_open()) {
63-
int e = 0;
47+
int num_inputs = 0;
48+
if (fin.is_open()) {
6449
std::string iline;
65-
std::string pline;
66-
while (std::getline(fin, iline) && std::getline(fpr, pline)) {
67-
if (e % 10 == 0) {
68-
std::cout << "Processing input/prediction " << e << std::endl;
50+
while (std::getline(fin, iline)) {
51+
if (num_inputs % 10 == 0) {
52+
std::cout << "Processing input " << num_inputs << std::endl;
6953
}
7054
std::stringstream in(iline);
71-
std::stringstream pred(pline);
7255
std::string token;
7356
while (in >> token) {
7457
in_buffer_t tmp = stof(token);
7558
inputData.push_back(tmp);
7659
}
77-
while (pred >> token) {
78-
out_buffer_t tmp = stof(token);
79-
outputPredictions.push_back(tmp);
80-
}
60+
num_inputs++;
8161
}
82-
e++;
8362
}
8463

8564
// Copying in testbench data
86-
int n = std::min((int) inputData.size(), INSTREAMSIZE * NUM_CU * NUM_THREAD);
87-
for (int i = 0; i < n; i++) {
88-
fpga.source_in[i] = inputData[i];
89-
}
65+
int num_samples = std::min(num_inputs, BATCHSIZE * NUM_CU * NUM_THREAD);
66+
memcpy(fpga.source_in.data(), inputData.data(), num_samples * DATA_SIZE_IN * sizeof(in_buffer_t));
9067

9168
// Padding rest of buffer with arbitrary values
92-
for (int i = n; i < INSTREAMSIZE * NUM_CU * NUM_THREAD; i++) {
93-
fpga.source_in[i] = (in_buffer_t)(1234.567);
69+
for (int i = num_samples * DATA_SIZE_IN; i < INSTREAMSIZE * NUM_CU * NUM_THREAD; i++) {
70+
fpga.source_in[i] = (in_buffer_t)(2.345678);
9471
}
9572

9673
std::vector<std::thread> hostAccelerationThreads;
9774
hostAccelerationThreads.reserve(NUM_THREAD);
9875

9976
std::cout << "Beginning FPGA run" << std::endl;
100-
10177
auto ts_start = SClock::now();
10278

10379
for (int i = 0; i < NUM_THREAD; i++) {
@@ -114,21 +90,18 @@ int main(int argc, char **argv) {
11490
float throughput = (float(NUM_CU * NUM_THREAD * 100 * BATCHSIZE) /
11591
float(std::chrono::duration_cast<std::chrono::nanoseconds>(ts_end - ts_start).count())) *
11692
1000000000.;
117-
118-
std::cout << "Throughput = "
119-
<< throughput
120-
<<" predictions/second\n" << std::endl;
93+
std::cout << "Throughput = " << throughput <<" predictions/second\n" << std::endl;
12194

122-
std::cout << "Writing hw resaults to file" << std::endl;
95+
std::cout << "Writing hw results to file" << std::endl;
12396
std::ofstream resultsFile;
12497
resultsFile.open("tb_data/hw_results.dat", std::ios::trunc);
12598
if (resultsFile.is_open()) {
126-
for (int i = 0; i < NUM_THREAD * NUM_CU * BATCHSIZE; i++) {
127-
std::stringstream line;
99+
for (int i = 0; i < num_samples; i++) {
100+
std::stringstream oline;
128101
for (int n = 0; n < DATA_SIZE_OUT; n++) {
129-
line << (float)fpga.source_hw_results[(i * DATA_SIZE_OUT) + n] << " ";
102+
oline << (float)fpga.source_hw_results[(i * DATA_SIZE_OUT) + n] << " ";
130103
}
131-
resultsFile << line.str() << "\n";
104+
resultsFile << oline.str() << "\n";
132105
}
133106
resultsFile.close();
134107
} else {

hls4ml/writer/vitis_accelerator_writer.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from shutil import copy, copytree
2+
from shutil import copy, copytree, rmtree
33

44
from hls4ml.writer.vitis_writer import VitisWriter
55

@@ -82,6 +82,10 @@ def write_kernel(self, model):
8282
fout_header = open(f'{model.config.get_output_dir()}/kernel_wrapper.h', 'w')
8383
model_inputs = model.get_input_variables()
8484
model_outputs = model.get_output_variables()
85+
if len(model_inputs) != 1 or len(model_outputs) != 1:
86+
raise Exception("Accelerator currently only supports projects with a single input and a single output variable")
87+
inp = model_inputs[0]
88+
out = model_outputs[0]
8589
for line in f_header.readlines():
8690
if '// hls-fpga-machine-learning accelerator parameters' in line:
8791
newline = ''
@@ -96,27 +100,25 @@ def write_kernel(self, model):
96100
elif '// hls-fpga-machine-learning accelerator io' in line:
97101
newline = ''
98102
if io_type == 'io_parallel':
99-
for inp in model_inputs:
100-
for out in model_outputs:
101-
newline += '#define DATA_SIZE_IN ' + format(inp.size_cpp()) + '\n'
102-
newline += '#define INSTREAMSIZE (BATCHSIZE * DATA_SIZE_IN)' + '\n\n'
103-
newline += '#define DATA_SIZE_OUT ' + format(out.size_cpp()) + '\n'
104-
newline += '#define OUTSTREAMSIZE (BATCHSIZE * DATA_SIZE_OUT)' + '\n\n'
105-
newline += 'typedef ' + format(inp.type.name) + ' in_buffer_t;\n'
106-
newline += 'typedef ' + format(out.type.name) + ' out_buffer_t;\n'
103+
newline += '#define DATA_SIZE_IN ' + format(inp.size_cpp()) + '\n'
104+
newline += '#define INSTREAMSIZE (BATCHSIZE * DATA_SIZE_IN)' + '\n\n'
105+
newline += '#define DATA_SIZE_OUT ' + format(out.size_cpp()) + '\n'
106+
newline += '#define OUTSTREAMSIZE (BATCHSIZE * DATA_SIZE_OUT)' + '\n\n'
107+
newline += 'typedef ' + format(inp.type.name) + ' in_buffer_t;\n'
108+
newline += 'typedef ' + format(out.type.name) + ' out_buffer_t;\n'
107109
elif io_type == 'io_stream':
108-
for inp in model_inputs:
109-
for out in model_outputs:
110-
(dims, _) = inp.get_shape()
111-
nnet_array_depth = dims.pop()
112-
newline += '#define DATA_SIZE_IN ' + ' * '.join(dims) + '\n'
113-
newline += '#define NNET_ARRAY_DEPTH ' + format(nnet_array_depth) + '\n'
114-
newline += '#define INSTREAMSIZE (BATCHSIZE * DATA_SIZE_IN * NNET_ARRAY_DEPTH)' + '\n\n'
115-
newline += '#define DATA_SIZE_OUT ' + format(out.size_cpp()) + '\n'
116-
newline += '#define OUTSTREAMSIZE (BATCHSIZE * DATA_SIZE_OUT)' + '\n\n'
117-
precision_str = model.config.backend.convert_precision_string(model.config.model_precision.get('default'))
118-
newline += 'typedef ' + precision_str + ' in_buffer_t;\n'
119-
newline += 'typedef ' + precision_str + ' out_buffer_t;\n'
110+
dims, _ = zip(*inp.get_shape())
111+
dims = list(dims)
112+
nnet_array_depth = dims.pop()
113+
dims.append("1")
114+
newline += '#define DATA_SIZE_IN ' + ' * '.join(dims) + '\n'
115+
newline += '#define NNET_ARRAY_DEPTH ' + format(nnet_array_depth) + '\n'
116+
newline += '#define INSTREAMSIZE (BATCHSIZE * DATA_SIZE_IN * NNET_ARRAY_DEPTH)' + '\n\n'
117+
newline += '#define DATA_SIZE_OUT ' + format(out.size_cpp()) + '\n'
118+
newline += '#define OUTSTREAMSIZE (BATCHSIZE * DATA_SIZE_OUT)' + '\n\n'
119+
precision_str = str(model.config.backend.convert_precision_string(model.config.model_precision.get('default')))
120+
newline += 'typedef ' + precision_str + ' in_buffer_t;\n'
121+
newline += 'typedef ' + precision_str + ' out_buffer_t;\n'
120122
else:
121123
newline = line
122124
fout_header.write(newline)
@@ -163,6 +165,8 @@ def write_host(self, model):
163165
# Write libraries
164166
src = os.path.join(filedir, '../templates/vitis_accelerator/libs')
165167
dst = f'{model.config.get_output_dir()}/libs'
168+
if os.path.exists(dst):
169+
rmtree(dst)
166170
copytree(src, dst, copy_function=copy)
167171

168172
def write_makefile(self, model):

0 commit comments

Comments
 (0)