Skip to content

Commit 2c35c7b

Browse files
Merge branch 'vitis_accelerator_dev' into merge
2 parents 9a68ddb + 55ff7e2 commit 2c35c7b

File tree

11 files changed

+332
-58
lines changed

11 files changed

+332
-58
lines changed

hls4ml/backends/__init__.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,18 @@
22
from hls4ml.backends.fpga.fpga_backend import FPGABackend # noqa: F401
33
from hls4ml.backends.quartus.quartus_backend import QuartusBackend
44
from hls4ml.backends.symbolic.symbolic_backend import SymbolicExpressionBackend
5+
from hls4ml.backends.vitis_accelerator.vitis_accelerator_config import VitisAcceleratorConfig # noqa: F401
56
from hls4ml.backends.vivado.vivado_backend import VivadoBackend
67
from hls4ml.backends.vivado_accelerator.vivado_accelerator_backend import VivadoAcceleratorBackend
78
from hls4ml.backends.vivado_accelerator.vivado_accelerator_config import VivadoAcceleratorConfig # noqa: F401
89

910
from hls4ml.backends.vitis.vitis_backend import VitisBackend # isort: skip
11+
from hls4ml.backends.vitis_accelerator.vitis_accelerator_backend import VitisAcceleratorBackend # isort: skip
1012

11-
#[K] start
12-
from hls4ml.backends.vitis_accelerator.vitis_accelerator_backend import VitisAcceleratorBackend
13-
from hls4ml.backends.vitis_accelerator.vitis_accelerator_config import VitisAcceleratorConfig
14-
#[K] end
1513

1614
register_backend('Vivado', VivadoBackend)
1715
register_backend('VivadoAccelerator', VivadoAcceleratorBackend)
1816
register_backend('Vitis', VitisBackend)
19-
register_backend('VitisAccelerator', VitisAcceleratorBackend) #[K]
17+
register_backend('VitisAccelerator', VitisAcceleratorBackend)
2018
register_backend('Quartus', QuartusBackend)
2119
register_backend('SymbolicExpression', SymbolicExpressionBackend)

hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
1-
import os
2-
import sys
3-
4-
from hls4ml.backends import VitisBackend
5-
from hls4ml.backends import VivadoBackend
1+
from hls4ml.backends import VitisBackend, VivadoBackend
62
from hls4ml.model.flow import get_flow, register_flow
7-
from hls4ml.report import parse_vivado_report
83

94

105
class VitisAcceleratorBackend(VitisBackend):
@@ -64,3 +59,52 @@ def _register_flows(self):
6459
ip_flow_requirements.insert(ip_flow_requirements.index('vivado:apply_templates'), template_flow)
6560

6661
self._default_flow = register_flow('ip', None, requires=ip_flow_requirements, backend=self.name)
62+
63+
def create_initial_config(
64+
self,
65+
board='pynq-z2',
66+
part=None,
67+
clock_period=5,
68+
io_type='io_parallel',
69+
interface='axi_stream',
70+
driver='python',
71+
input_type='float',
72+
output_type='float',
73+
platform='xilinx_u250_xdma_201830_2',
74+
):
75+
'''
76+
Create initial accelerator config with default parameters
77+
78+
Args:
79+
board: one of the keys defined in supported_boards.json
80+
clock_period: clock period passed to hls project
81+
io_type: io_parallel or io_stream
82+
interface: `axi_stream`: generate hardware designs and drivers which exploit axi stream channels.
83+
`axi_master`: generate hardware designs and drivers which exploit axi master channels.
84+
`axi_lite` : generate hardware designs and drivers which exploit axi lite channels. (Don't use it
85+
to exchange large amount of data)
86+
driver: `python`: generates the python driver to use the accelerator in the PYNQ stack.
87+
`c`: generates the c driver to use the accelerator bare-metal.
88+
input_type: the wrapper input precision. Can be `float` or an `ap_type`. Note: VivadoAcceleratorBackend
89+
will round the number of bits used to the next power-of-2 value.
90+
output_type: the wrapper output precision. Can be `float` or an `ap_type`. Note:
91+
VivadoAcceleratorBackend will round the number of bits used to the next power-of-2 value.
92+
platform: development target platform
93+
94+
Returns:
95+
populated config
96+
'''
97+
board = board if board is not None else 'pynq-z2'
98+
config = super().create_initial_config(part, clock_period, io_type)
99+
config['AcceleratorConfig'] = {}
100+
config['AcceleratorConfig']['Board'] = board
101+
config['AcceleratorConfig']['Interface'] = interface # axi_stream, axi_master, axi_lite
102+
config['AcceleratorConfig']['Driver'] = driver
103+
config['AcceleratorConfig']['Precision'] = {}
104+
config['AcceleratorConfig']['Precision']['Input'] = {}
105+
config['AcceleratorConfig']['Precision']['Output'] = {}
106+
config['AcceleratorConfig']['Precision']['Input'] = input_type # float, double or ap_fixed<a,b>
107+
config['AcceleratorConfig']['Precision']['Output'] = output_type # float, double or ap_fixed<a,b>
108+
config['AcceleratorConfig']['Platform'] = platform
109+
110+
return config

hls4ml/backends/vitis_accelerator/vitis_accelerator_config.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,3 @@
1-
import json
2-
import os
3-
4-
import numpy as np
5-
6-
from hls4ml.model.layers import FixedPrecisionType, IntegerPrecisionType
7-
8-
91
class VitisAcceleratorConfig:
102
def __init__(self, config):
113
self.config = config.config
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
/**
2+
* Copyright (C) 2019-2022 Xilinx, Inc
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"). You may
5+
* not use this file except in compliance with the License. A copy of the
6+
* License is located at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
* License for the specific language governing permissions and limitations
14+
* under the License.
15+
*/
16+
17+
#include <cstring>
18+
#include <iostream>
19+
20+
// XRT includes
21+
#include "xrt/xrt_bo.h"
22+
#include "xrt/xrt_device.h"
23+
#include "xrt/xrt_kernel.h"
24+
25+
////////////////// HLS4ML Includes start //////////////////
26+
27+
#include <algorithm>
28+
#include <fstream>
29+
#include <iostream>
30+
#include <map>
31+
#include <math.h>
32+
#include <stdio.h>
33+
#include <stdlib.h>
34+
#include <vector>
35+
36+
#include "firmware/myproject.h"
37+
#include "firmware/nnet_utils/nnet_helpers.h"
38+
39+
// hls-fpga-machine-learning insert bram
40+
41+
#define CHECKPOINT 5000
42+
43+
namespace nnet {
44+
bool trace_enabled = true;
45+
std::map<std::string, void *> *trace_outputs = NULL;
46+
size_t trace_type_size = sizeof(double);
47+
} // namespace nnet
48+
49+
////////////////// HLS4ML Includes end //////////////////
50+
51+
#define DATA_SIZE 1
52+
53+
int main(int argc, char **argv) {
54+
55+
// Read settings
56+
std::string binaryFile = argv[1];
57+
int device_index = 0;
58+
59+
if (argc != 2) {
60+
std::cout << "Usage: " << argv[0] << " <XCLBIN File>" << std::endl;
61+
return EXIT_FAILURE;
62+
}
63+
64+
std::string target_device = "myplatform";
65+
auto device_type = target_device.substr(0, target_device.size() - 17);
66+
std::cout << "Device type: " << device_type << std::endl;
67+
std::cout << "xrt::device size " << sizeof(xrt::device) << std::endl;
68+
for (int i = 0; i < sizeof(xrt::device); i++){
69+
std::cout << "device[" << i << "] name: " << xrt::device(i).get_info<xrt::info::device::name>() << "\n";
70+
std::cout << "device[" << i << "] bdf: " << xrt::device(i).get_info<xrt::info::device::bdf>() << "\n\n";
71+
size_t found = xrt::device(i).get_info<xrt::info::device::name>().find(device_type);
72+
if (found != std::string::npos){
73+
std::cout << "Device: " << xrt::device(i).get_info<xrt::info::device::name>() << " found." << std::endl;
74+
device_index = i;
75+
std::cout << "Device index in loop: " <<device_index << std::endl;
76+
break;
77+
}
78+
else{
79+
std::cout << "Device not found" << std::endl;
80+
}
81+
}
82+
std::cout << "Open the device" << device_index << std::endl;
83+
auto device = xrt::device(device_index);
84+
std::cout << "Load the xclbin " << binaryFile << std::endl;
85+
auto uuid = device.load_xclbin(binaryFile);
86+
87+
size_t vector_size_bytes_in = sizeof(input_t) * DATA_SIZE;
88+
size_t vector_size_bytes_out = sizeof(result_t) * DATA_SIZE;
89+
90+
auto krnl = xrt::kernel(device, uuid, "myproject_kernel");
91+
92+
std::cout << "Allocate Buffer in Global Memory\n";
93+
auto bo0 = xrt::bo(device, vector_size_bytes_in, krnl.group_id(0));
94+
auto bo_out = xrt::bo(device, vector_size_bytes_out, krnl.group_id(1));
95+
96+
// Map the contents of the buffer object into host memory
97+
auto bo0_map = bo0.map<input_t *>();
98+
auto bo0_out_map = bo_out.map<result_t *>();
99+
memset((char *)bo0_map, 0, vector_size_bytes_in);
100+
memset((char *)bo0_out_map, 0, vector_size_bytes_out);
101+
102+
// Create the test data
103+
/////////////////////////// From HLS4ML test start ///////////////////////////
104+
105+
// load input data from text file
106+
std::ifstream fin("output_dir/tb_data/tb_input_features.dat");
107+
// load predictions from text file
108+
std::ifstream fpr("output_dir/tb_data/tb_output_predictions.dat");
109+
110+
std::string RESULTS_LOG = "output_dir/tb_data/hw_results.log";
111+
std::ofstream fout(RESULTS_LOG);
112+
113+
std::string iline;
114+
std::string pline;
115+
int e = 0;
116+
117+
if (fin.is_open() && fpr.is_open()) {
118+
while (std::getline(fin, iline) && std::getline(fpr, pline)) {
119+
if (e % CHECKPOINT == 0)
120+
std::cout << "Processing input " << e << std::endl;
121+
char *cstr = const_cast<char *>(iline.c_str());
122+
char *current;
123+
std::vector<float> in;
124+
current = strtok(cstr, " ");
125+
while (current != NULL) {
126+
in.push_back(atof(current));
127+
current = strtok(NULL, " ");
128+
}
129+
cstr = const_cast<char *>(pline.c_str());
130+
std::vector<float> pr;
131+
current = strtok(cstr, " ");
132+
while (current != NULL) {
133+
pr.push_back(atof(current));
134+
current = strtok(NULL, " ");
135+
}
136+
// Ensure the size of in is not greater than bo0_map size
137+
size_t minSize = std::min(in.size(), static_cast<size_t>(input_t::size)); // Access size as a static member
138+
139+
for (size_t i = 0; i < minSize; ++i) {
140+
// Perform type conversion and scale appropriately to fit within ap_fixed<16,6>
141+
(*bo0_map)[i] = static_cast<ap_fixed<16, 6>>(in[i]); // Assuming in[i] fits within range of ap_fixed<16,6>
142+
}
143+
144+
// hls-fpga-machine-learning insert top-level-function
145+
//////////////////// Run on HW start ////////////////////
146+
// Synchronize buffer content with device side
147+
std::cout << "synchronize input buffer data to device global memory\n";
148+
149+
bo0.sync(XCL_BO_SYNC_BO_TO_DEVICE);
150+
// bo1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
151+
152+
std::cout << "Execution of the kernel\n";
153+
// auto run = krnl(bo0, bo1, bo_out, DATA_SIZE);
154+
auto run = krnl(bo0, bo_out, DATA_SIZE);
155+
run.wait();
156+
157+
// Get the output;
158+
std::cout << "Get the output data from the device" << std::endl;
159+
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
160+
161+
// Print contents of bo0_map
162+
std::cout << "Contents of bo0_map (Input):" << std::endl;
163+
for (int i = 0; i < DATA_SIZE; ++i) {
164+
for (size_t j = 0; j < myinput; j++) {
165+
std::cout << bo0_map[i][j] << " ";
166+
}
167+
}
168+
std::cout << std::endl;
169+
170+
std::cout << "Contents of bo0_out_map (Output):" << std::endl;
171+
for (int i = 0; i < DATA_SIZE; ++i) {
172+
for (size_t j = 0; j < mylayer_out; j++) {
173+
std::cout << bo0_out_map[i][j] << " ";
174+
}
175+
}
176+
std::cout << std::endl;
177+
std::cout << "TEST END\n";
178+
//////////////////// Run on HW end ////////////////////
179+
180+
if (e % CHECKPOINT == 0) {
181+
std::cout << "Predictions" << std::endl;
182+
// hls-fpga-machine-learning insert predictions
183+
for (int i = 0; i < mylayer_out; i++) {
184+
std::cout << pr[i] << " ";
185+
}
186+
std::cout << std::endl;
187+
std::cout << "Quantized predictions" << std::endl;
188+
}
189+
e++;
190+
}
191+
192+
delete bo0_map; // Don't forget to release memory if dynamically allocated
193+
194+
fin.close();
195+
fpr.close();
196+
} else {
197+
std::cout << "INFO: Unable to open input/predictions file, using default input." << std::endl;
198+
199+
//////////////////// Run on HW start ////////////////////
200+
bo0_map = {0};
201+
202+
// Synchronize buffer content with device side
203+
std::cout << "synchronize input buffer data to device global memory\n";
204+
205+
bo0.sync(XCL_BO_SYNC_BO_TO_DEVICE);
206+
207+
std::cout << "Execution of the kernel\n";
208+
auto run = krnl(bo0, bo_out, DATA_SIZE);
209+
run.wait();
210+
211+
// Get the output;
212+
std::cout << "Get the output data from the device" << std::endl;
213+
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
214+
215+
std::cout << "Contents of bo0_out_map (Output):" << std::endl;
216+
for (int i = 0; i < DATA_SIZE; ++i) {
217+
for (size_t j = 0; j < mylayer_out; j++) {
218+
std::cout << bo0_out_map[i][j] << " ";
219+
}
220+
}
221+
std::cout << std::endl;
222+
223+
std::cout << "TEST END\n";
224+
//////////////////// Run on HW end ////////////////////
225+
}
226+
fout.close();
227+
std::cout << "INFO: Saved inference results to file: " << RESULTS_LOG << std::endl;
228+
/////////////////////////// From HLS4ML test end ///////////////////////////
229+
return 0;
230+
}

hls4ml/templates/vitis_accelerator/myproject_host_cl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ void runFPGAHelper(FpgaObj<in_buffer_t, out_buffer_t> &fpga) {
2222
fpga.write_ss_safe(ss.str());
2323
}
2424

25-
int main(int argc, char** argv) {
25+
int main(int argc, char **argv) {
2626
if (argc != 2) {
2727
std::cout << "Usage: " << argv[0] << " <XCLBIN Filename>" << std::endl;
2828
return EXIT_FAILURE;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include <iostream>
2+
3+
#include "firmware/myproject.cpp"
4+
#include "firmware/myproject.h"
5+
#include "firmware/parameters.h"
6+
7+
constexpr int c_size = 1024;
8+
9+
static void load_input(input_t *in, hls::stream<input_t> &inStream, int size) {
10+
mem_rd:
11+
for (int i = 0; i < size; i++) {
12+
#pragma HLS LOOP_TRIPCOUNT min = c_size max = c_size
13+
inStream << in[i];
14+
}
15+
}
16+
// static void store_result(result_t* out, hls::stream<result_t>& out_stream, int size) {
17+
static void store_result(result_t *out, hls::stream<result_t> &out_stream, int size) {
18+
mem_wr:
19+
for (int i = 0; i < size; i++) {
20+
#pragma HLS LOOP_TRIPCOUNT min = c_size max = c_size
21+
result_t temp = out_stream.read();
22+
out[i] = temp;
23+
}
24+
}
25+
26+
void myproject_kernel(
27+
// hls-fpga-machine-learning insert header
28+
) {
29+
#pragma HLS INTERFACE m_axi port = project_input bundle = gmem0
30+
#pragma HLS INTERFACE m_axi port = project_output bundle = gmem1
31+
static hls::stream<input_t> project_input_stream("project_input_stream");
32+
static hls::stream<result_t> project_output_stream("project_output_stream");
33+
#pragma HLS dataflow
34+
load_input(project_input, project_input_stream, size);
35+
// hls-fpga-machine-learning insert project top
36+
store_result(project_output, project_output_stream, size);
37+
}

hls4ml/templates/vitis_accelerator/nnet_utils/nnet_types.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ template <typename T, unsigned N> struct array {
1919
const T &operator[](size_t pos) const { return data[pos]; }
2020

2121
array &operator=(const array &other) {
22-
// if (&other == this)
23-
// return *this;
22+
// if (&other == this)
23+
// return *this;
2424

2525
assert(N == other.size && "Array sizes must match.");
2626

hls4ml/writer/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from hls4ml.writer.quartus_writer import QuartusWriter
22
from hls4ml.writer.symbolic_writer import SymbolicExpressionWriter
3-
from hls4ml.writer.vitis_writer import VitisWriter
43
from hls4ml.writer.vitis_accelerator_writer import VitisAcceleratorWriter
4+
from hls4ml.writer.vitis_writer import VitisWriter
55
from hls4ml.writer.vivado_accelerator_writer import VivadoAcceleratorWriter
66
from hls4ml.writer.vivado_writer import VivadoWriter
77
from hls4ml.writer.writers import Writer, get_writer, register_writer # noqa: F401

0 commit comments

Comments
 (0)