Skip to content

Commit 9779fcd

Browse files
implementing hw quant option
1 parent 67eb4df commit 9779fcd

File tree

6 files changed

+53
-27
lines changed

6 files changed

+53
-27
lines changed

hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def create_initial_config(
2222
num_kernel=1,
2323
num_thread=1,
2424
batchsize=8192,
25+
hw_quant=False,
2526
vivado_directives=[]
2627
):
2728
'''
@@ -45,6 +46,7 @@ def create_initial_config(
4546
config['AcceleratorConfig']['Num_Kernel'] = num_kernel
4647
config['AcceleratorConfig']['Num_Thread'] = num_thread
4748
config['AcceleratorConfig']['Batchsize'] = batchsize
49+
config['AcceleratorConfig']['HW_Quant'] = hw_quant
4850
config['AcceleratorConfig']['Vivado_Directives'] = vivado_directives
4951
return config
5052

hls4ml/backends/vitis_accelerator/vitis_accelerator_config.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@ def __init__(self, config):
2828
)
2929
self.config['Part'] = self.part
3030

31-
self.num_kernel = accel_config.get('Num_Kernel')
32-
self.num_thread = accel_config.get('Num_Thread')
33-
self.batchsize = accel_config.get('Batchsize')
31+
self.num_kernel = accel_config.get('Num_Kernel', 1)
32+
self.num_thread = accel_config.get('Num_Thread', 1)
33+
self.batchsize = accel_config.get('Batchsize', 8192)
34+
self.hw_quant = accel_config.get('HW_Quant', False)
3435

35-
self.vivado_directives = accel_config.get('Vivado_Directives')
36+
self.vivado_directives = accel_config.get('Vivado_Directives', [])
3637

3738
def get_board_type(self):
3839
return self.board_type
@@ -55,5 +56,8 @@ def get_memory_type(self):
5556
def get_memory_channel_count(self):
5657
return self.memory_channel_count
5758

59+
def get_hw_quant(self):
60+
return self.hw_quant
61+
5862
def get_vivado_directives(self):
5963
return self.vivado_directives

hls4ml/templates/vitis_accelerator/kernel_wrapper_io_parallel.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#include "kernel_wrapper.h"
22
#include "firmware/myproject.h"
33

4-
static void read_input(const in_buffer_t *in, in_buffer_t (&in_buf)[BATCHSIZE][DATA_SIZE_IN]) {
4+
static void read_input(const /*IN_INTERFACE_TYPE*/ *in, in_buffer_t (&in_buf)[BATCHSIZE][DATA_SIZE_IN]) {
55
for (int i = 0; i < BATCHSIZE; i++) {
66
#pragma HLS PIPELINE
77
for(int j = 0; j < DATA_SIZE_IN; j++) {
88
#pragma HLS UNROLL
9-
in_buf[i][j] = in[i * DATA_SIZE_IN + j];
9+
in_buf[i][j] = /*IN_HW_QUANT*/in[i * DATA_SIZE_IN + j];
1010
}
1111
}
1212
}
@@ -16,12 +16,12 @@ static void run_inference(in_buffer_t (&in_buf)[BATCHSIZE][DATA_SIZE_IN], out_bu
1616
myproject(in_buf[i],out_buf[i]);
1717
}
1818
}
19-
static void write_result(out_buffer_t *out, out_buffer_t (&out_buf)[BATCHSIZE][DATA_SIZE_OUT]) {
19+
static void write_result(/*OUT_INTERFACE_TYPE*/ *out, out_buffer_t (&out_buf)[BATCHSIZE][DATA_SIZE_OUT]) {
2020
for (int i = 0; i < BATCHSIZE; i++) {
2121
#pragma HLS PIPELINE
2222
for (int j = 0; j < DATA_SIZE_OUT; j++) {
2323
#pragma HLS UNROLL
24-
out[i * DATA_SIZE_OUT + j] = out_buf[i][j];
24+
out[i * DATA_SIZE_OUT + j] = /*OUT_HW_QUANT*/out_buf[i][j];
2525
}
2626
}
2727
}
@@ -32,7 +32,7 @@ extern "C" {
3232
\param in Input Vector
3333
\param out Output Vector
3434
*/
35-
void kernel_wrapper(const in_buffer_t *in, out_buffer_t *out) {
35+
void kernel_wrapper(const /*IN_INTERFACE_TYPE*/ *in, /*OUT_INTERFACE_TYPE*/ *out) {
3636
in_buffer_t in_buf[BATCHSIZE][DATA_SIZE_IN];
3737
out_buffer_t out_buf[BATCHSIZE][DATA_SIZE_OUT];
3838
#pragma HLS ARRAY_RESHAPE variable=in_buf complete dim=2

hls4ml/templates/vitis_accelerator/kernel_wrapper_io_stream.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
#include "kernel_wrapper.h"
22
#include "firmware/myproject.h"
33

4-
static void read_input(const in_buffer_t *in, hls::stream<input_t> &input, int n) {
4+
static void read_input(const /*IN_INTERFACE_TYPE*/ *in, hls::stream<input_t> &input, int n) {
55
for (int i = 0; i < DATA_SIZE_IN; i++) {
66
#pragma HLS PIPELINE
77
input_t tmp;
88
for (int j = 0; j < NNET_ARRAY_DEPTH; j++) {
99
#pragma HLS UNROLL
10-
tmp[j] = in[(n * DATA_SIZE_IN * NNET_ARRAY_DEPTH) + (i * NNET_ARRAY_DEPTH) + j];
10+
tmp[j] = /*IN_HW_QUANT*/in[(n * DATA_SIZE_IN * NNET_ARRAY_DEPTH) + (i * NNET_ARRAY_DEPTH) + j];
1111
}
1212
input << tmp;
1313
}
1414
}
1515

16-
static void write_result(out_buffer_t *out, hls::stream<result_t> &output, int n) {
16+
static void write_result(/*OUT_INTERFACE_TYPE*/ *out, hls::stream<result_t> &output, int n) {
1717
result_t tmp = output.read();
1818
for (int i = 0; i < DATA_SIZE_OUT; i++) {
1919
#pragma HLS UNROLL
20-
out[(n * DATA_SIZE_OUT) + i] = tmp[i];
20+
out[(n * DATA_SIZE_OUT) + i] = /*OUT_HW_QUANT*/tmp[i];
2121
}
2222
}
2323

@@ -27,7 +27,7 @@ extern "C" {
2727
\param in Input Vector
2828
\param out Output Vector
2929
*/
30-
void kernel_wrapper(const in_buffer_t *in, out_buffer_t *out) {
30+
void kernel_wrapper(const /*IN_INTERFACE_TYPE*/ *in, /*OUT_INTERFACE_TYPE*/ *out) {
3131
hls::stream<input_t> input("input");
3232
hls::stream<result_t> output("output");
3333
#pragma HLS STREAM variable=input depth=DATA_SIZE_IN

hls4ml/templates/vitis_accelerator/myproject_host_cl.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#define EXPAND_STRING(var) STRINGIFY(var)
1717

1818

19-
void runFPGAHelper(FpgaObj<in_buffer_t, out_buffer_t> &fpga) {
19+
void runFPGAHelper(FpgaObj</*IN_INTERFACE_TYPE*/, /*OUT_INTERFACE_TYPE*/> &fpga) {
2020
fpga.runFPGA();
2121
}
2222

@@ -27,7 +27,7 @@ int main(int argc, char **argv) {
2727
}
2828
std::string xclbinFilename = argv[1];
2929

30-
/*FPGATYPE*/<in_buffer_t, out_buffer_t> fpga(BATCHSIZE * INSTREAMSIZE, BATCHSIZE * OUTSTREAMSIZE, NUM_CU, NUM_THREAD, 10);
30+
// hls-fpga-machine-learning FPGA type
3131

3232
std::vector<cl::Device> devices = xcl::get_xil_devices(); // Utility API that finds xilinx platforms and return a list of devices connected to Xilinx platforms
3333
auto fileBuf = xcl::read_binary_file(xclbinFilename); // Load xclbin
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
4141
if (!fin.is_open()) {
4242
std::cerr << "Error: Could not open tb_input_features.dat" << std::endl;
4343
}
44-
std::vector<in_buffer_t> inputData;
44+
std::vector</*IN_INTERFACE_TYPE*/> inputData;
4545
int num_inputs = 0;
4646
if (fin.is_open()) {
4747
std::string iline;
@@ -52,17 +52,16 @@ int main(int argc, char **argv) {
5252
std::stringstream in(iline);
5353
std::string token;
5454
while (in >> token) {
55-
in_buffer_t tmp = stof(token);
56-
inputData.push_back(tmp);
55+
inputData.push_back(/*IN_TYPE_CAST*/stof(token));
5756
}
5857
num_inputs++;
5958
}
6059
}
6160
fin.close();
6261

63-
// Copying in testbench data
62+
// Copying input data into memory-mapped arrays
6463
int num_samples = std::min(num_inputs, BATCHSIZE * NUM_CU * NUM_THREAD);
65-
memcpy(fpga.source_in.data(), inputData.data(), num_samples * INSTREAMSIZE * sizeof(in_buffer_t));
64+
memcpy(fpga.source_in.data(), inputData.data(), num_samples * INSTREAMSIZE * sizeof(/*IN_INTERFACE_TYPE*/));
6665

6766
std::vector<std::thread> hostAccelerationThreads;
6867
hostAccelerationThreads.reserve(NUM_THREAD);
@@ -93,7 +92,7 @@ int main(int argc, char **argv) {
9392
for (int i = 0; i < num_samples; i++) {
9493
std::stringstream oline;
9594
for (int n = 0; n < DATA_SIZE_OUT; n++) {
96-
oline << (float)fpga.source_hw_results[(i * DATA_SIZE_OUT) + n] << " ";
95+
oline << /*OUT_TYPE_CAST*/fpga.source_hw_results[(i * DATA_SIZE_OUT) + n] << " ";
9796
}
9897
resultsFile << oline.str() << "\n";
9998
}

hls4ml/writer/vitis_accelerator_writer.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,22 @@ def write_kernel(self, model):
127127
# Writing source file
128128
f_source = open(os.path.join(filedir, '../templates/vitis_accelerator/kernel_wrapper_' + io_type +'.cpp'))
129129
fout_source = open(f'{model.config.get_output_dir()}/kernel_wrapper.cpp', 'w')
130+
isHwQuant = self.vitis_accelerator_config.get_hw_quant()
130131
for line in f_source.readlines():
131132
if 'myproject' in line:
132133
newline = line.replace('myproject', format(model.config.get_project_name()))
134+
elif '/*IN_HW_QUANT*/' in line:
135+
newline = line.replace('/*IN_HW_QUANT*/', '(in_buffer_t)' if isHwQuant else '')
136+
elif '/*OUT_HW_QUANT*/' in line:
137+
newline = line.replace('/*OUT_HW_QUANT*/', '(float)' if isHwQuant else '')
133138
else:
134139
newline = line
140+
141+
if '/*IN_INTERFACE_TYPE*/' in newline:
142+
newline = newline.replace('/*IN_INTERFACE_TYPE*/', ('float' if isHwQuant else 'in_buffer_t'))
143+
if '/*OUT_INTERFACE_TYPE*/' in newline:
144+
newline = newline.replace('/*OUT_INTERFACE_TYPE*/', ('float' if isHwQuant else 'out_buffer_t'))
145+
135146
fout_source.write(newline)
136147
f_source.close()
137148
fout_source.close()
@@ -149,14 +160,24 @@ def write_host(self, model):
149160
filedir = os.path.dirname(os.path.abspath(__file__))
150161
f = open(os.path.join(filedir, '../templates/vitis_accelerator/myproject_host_cl.cpp'))
151162
fout = open(f'{model.config.get_output_dir()}/{model.config.get_project_name()}_host_cl.cpp', 'w')
163+
memoryType = self.vitis_accelerator_config.get_memory_type()
164+
isHwQuant = self.vitis_accelerator_config.get_hw_quant()
152165
for line in f.readlines():
153-
if '/*FPGATYPE*/' in line:
154-
if self.vitis_accelerator_config.get_memory_type() == 'hbm':
155-
newline = line.replace('/*FPGATYPE*/', 'HbmFpga')
156-
elif self.vitis_accelerator_config.get_memory_type() == 'ddr':
157-
newline = line.replace('/*FPGATYPE*/', 'DdrFpga')
166+
if '// hls-fpga-machine-learning FPGA type' in line:
167+
fpgaType = 'HbmFpga' if memoryType == 'hbm' else ('DdrFpga' if memoryType == 'ddr' else 'DdrFpga')
168+
dataType = '<float, float>' if isHwQuant else '<in_buffer_t, out_buffer_t>'
169+
newline = fpgaType + dataType + ' fpga(BATCHSIZE * INSTREAMSIZE, BATCHSIZE * OUTSTREAMSIZE, NUM_CU, NUM_THREAD, 10);'
170+
elif '/*IN_TYPE_CAST*/' in line:
171+
newline = line.replace('/*IN_TYPE_CAST*/', '' if isHwQuant else '(in_buffer_t)')
172+
elif '/*OUT_TYPE_CAST*/' in line:
173+
newline = line.replace('/*OUT_TYPE_CAST*/', '' if isHwQuant else '(float)')
158174
else:
159175
newline = line
176+
177+
if '/*IN_INTERFACE_TYPE*/' in line:
178+
newline = newline.replace('/*IN_INTERFACE_TYPE*/', 'float' if isHwQuant else 'in_buffer_t')
179+
if '/*OUT_INTERFACE_TYPE*/' in line:
180+
newline = newline.replace('/*OUT_INTERFACE_TYPE*/', 'float' if isHwQuant else 'out_buffer_t')
160181
fout.write(newline)
161182
f.close()
162183
fout.close()

0 commit comments

Comments
 (0)