fastmachinelearning
diff --git a/‎hls4ml/backends/vitis_accelerator/supported_boards.json
Lines changed: 12 additions & 0 deletions b/‎hls4ml/backends/vitis_accelerator/supported_boards.json
Lines changed: 12 additions & 0 deletions
diff --git a/‎hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py
Lines changed: 31 additions & 0 deletions b/‎hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎hls4ml/backends/vitis_accelerator/vitis_accelerator_config.py
Lines changed: 42 additions & 4 deletions b/‎hls4ml/backends/vitis_accelerator/vitis_accelerator_config.py
Lines changed: 42 additions & 4 deletions
diff --git a/‎hls4ml/templates/vitis_accelerator/Makefile
Lines changed: 48 additions & 24 deletions b/‎hls4ml/templates/vitis_accelerator/Makefile
Lines changed: 48 additions & 24 deletions
diff --git a/‎hls4ml/templates/vitis_accelerator/accelerator_card.cfg
Lines changed: 25 additions & 6 deletions b/‎hls4ml/templates/vitis_accelerator/accelerator_card.cfg
Lines changed: 25 additions & 6 deletions
diff --git a/‎hls4ml/templates/vitis_accelerator/hls_config.tcl
Lines changed: 1 addition & 0 deletions b/‎hls4ml/templates/vitis_accelerator/hls_config.tcl
Lines changed: 1 addition & 0 deletions
diff --git a/‎hls4ml/templates/vitis_accelerator/kernel_wrapper.h
Lines changed: 10 additions & 0 deletions b/‎hls4ml/templates/vitis_accelerator/kernel_wrapper.h
Lines changed: 10 additions & 0 deletions
diff --git a/‎hls4ml/templates/vitis_accelerator/kernel_wrapper_io_parallel.cpp
Lines changed: 46 additions & 0 deletions b/‎hls4ml/templates/vitis_accelerator/kernel_wrapper_io_parallel.cpp
Lines changed: 46 additions & 0 deletions
diff --git a/‎hls4ml/templates/vitis_accelerator/kernel_wrapper_io_stream.cpp
Lines changed: 43 additions & 0 deletions b/‎hls4ml/templates/vitis_accelerator/kernel_wrapper_io_stream.cpp
Lines changed: 43 additions & 0 deletions
@@ -0,0 +1,12 @@
+{
+    "alveo-u55c": {
+      "part": "xcu55c-fsvh2892-2L-e",
+      "platform": "xilinx_u55c_gen3x16_xdma_3_202210_1",
+      "memory": {"type": "hbm", "channels": 32}
+    },
+    "alveo-u250": {
+      "part": "xcu250-figd2104-2L-e",
+      "platform": "xilinx_u250_xdma_201830_2",
+      "memory": {"type": "ddr", "channels": 4}
+    }
+  }
@@ -13,6 +13,37 @@ def __init__(self):
         self._register_layer_attributes()
         self._register_flows()
 
+    def create_initial_config(
+        self,
+        board='alveo-u55c',
+        part=None,
+        clock_period=5,
+        io_type='io_parallel',
+        num_kernel=1,
+        num_thread=1,
+        batchsize=8192
+    ):
+        '''
+        Create initial accelerator config with default parameters
+
+        Args:
+            board: one of the keys defined in supported_boards.json
+            clock_period: clock period passed to hls project
+            io_type: io_parallel or io_stream
+            num_kernel: how many compute units to create on the fpga
+            num_thread: how many threads the host cpu uses to drive the fpga
+        Returns:
+            populated config
+        '''
+        board = board if board is not None else 'alveo-u55c'
+        config = super().create_initial_config(part, clock_period, io_type)
+        config['AcceleratorConfig'] = {}
+        config['AcceleratorConfig']['Board'] = board
+        config['AcceleratorConfig']['Num_Kernel'] = num_kernel
+        config['AcceleratorConfig']['Num_Thread'] = num_thread
+        config['AcceleratorConfig']['Batchsize'] = batchsize
+        return config
+
     def _register_flows(self):
         validation_passes = [
             'vitisaccelerator:validate_conv_implementation',
 
@@ -9,10 +9,48 @@
 class VitisAcceleratorConfig:
     def __init__(self, config):
         self.config = config.config
+        accel_config = self.config.get('AcceleratorConfig', None)
+        if accel_config is None:
+             raise Exception('Missing AcceleratorConfig')
+        
+        self.board = accel_config.get('Board', 'alveo-u55c')
+        self.supported_boards = json.load(open(os.path.dirname(__file__) + '/supported_boards.json'))
+        if self.board in self.supported_boards.keys():
+            board_info = self.supported_boards[self.board]
+            self.part = board_info['part']
+            if self.board.startswith('alveo'):
+                self.platform = board_info['platform']
+            self.memory_type = board_info['memory']['type']
+            self.memory_channel_count = board_info['memory']['channels']
+        else:
+            raise Exception('The board does not appear in supported_boards.json file')
 
-        self.platform = self.config['AcceleratorConfig'].get(
-            'Platform', 'xilinx_u250_xdma_201830_2'
-        )  # Get platform folder name
+        if self.config.get('Part') is not None:
+            if self.config.get('Part') != self.part:
+                print(
+                    'WARNING: You set a Part that does not correspond to the Board you specified.' 
+                    'The correct Part is now set.'
+                )
+                self.config['Part'] = self.part
+        
+        self.num_kernel = accel_config.get('Num_Kernel')
+        self.num_thread = accel_config.get('Num_Thread')
+        self.batchsize = accel_config.get('Batchsize')        
 
     def get_platform(self):
-        return self.platform
+        return self.platform
+    
+    def get_num_thread(self):
+        return self.num_thread
+    
+    def get_num_kernel(self):
+        return self.num_kernel
+    
+    def get_batchsize(self):
+        return self.batchsize
+    
+    def get_memory_type(self):
+        return self.memory_type
+    
+    def get_memory_channel_count(self):
+        return self.memory_channel_count
@@ -1,32 +1,56 @@
-.PHONY: clean
-
-PLATFORM=myplatform
+# Absolute path to top directory of Git repository
+PWD = $(shell readlink -f .)
 
-app.exe: ./myproject_host.cpp
-	g++ -g -std=c++14 ./myproject_host.cpp -o app.exe \
-		-I firmware/ap_types -I/opt/xilinx/xrt/include \
-		-L${XILINX_XRT}/lib/ -lxrt_coreutil -lOpenCL -pthread -lrt -lstdc++
+#Checks for XILINX_VITIS
+ifndef XILINX_VITIS
+$(error XILINX_VITIS variable is not set, please set correctly and rerun)
+endif
 
-myproject_kernel.xo: myproject_kernel.cpp
-	v++ --target hw --compile -I"./" --config accelerator_card.cfg -o"build/myproject_kernel.xo" "myproject_kernel.cpp"
+#Checks for XILINX_XRT
+ifndef XILINX_XRT
+$(error XILINX_XRT variable is not set, please set correctly and rerun)
+endif
 
-ifneq (,$(findstring vck5000,$(PLATFORM)))
-myproject_kernel.xsa: ./build/myproject_kernel.xo
-	v++ -l -t hw --config ./accelerator_card.cfg ./build/myproject_kernel.xo -o myproject_kernel.xsa
+#Checks for XILINX_VIVADO
+ifndef XILINX_VIVADO
+$(error XILINX_VIVADO variable is not set, please set correctly and rerun)
+endif
 
-myproject_kernel.xclbin: ./myproject_kernel.xsa
-	v++ --package -t hw --config ./accelerator_card.cfg ./myproject_kernel.xsa -o myproject_kernel.xclbin
-else
-myproject_kernel.xclbin: ./myproject_kernel.xo
-	v++ -l -t hw --config ./accelerator_card.cfg ./build/myproject_kernel.xo -o myproject_kernel.xclbin
+#Checks for g++
+ifneq ($(shell expr $(shell g++ -dumpversion) \>= 5), 1)
+CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-6.2.0/bin/g++
+$(warning [WARNING]: g++ version older. Using g++ provided by the tool : $(CXX))
 endif
 
-emconfig.json:
-	emconfigutil --platform  xilinx_vck5000_gen4x8_qdma_2_202220_1 --nd 1
+KERN_LIBRARIES += -I./ -I./firmware/ -I./firmware/weights -I./firmware/nnet_utils/
 
-clean:
-	rm -f KERAS_3layer.json KERAS_3layer_weights.h5  prj_tuto_vitis_stream.tar.gz
-	rm -rf myproject_kernel* myproject_kernel* .exe *json *csv *log *summary _x xilinx* .run .Xil .ipcache *.jou
+.PHONY: all
+all: host kernel 
 
-# Unless specified, use the current directory name as the v++ build target
-TARGET ?= $(notdir $(CURDIR))
+# Building kernel
+./build/myproject_kernel.xo: kernel_wrapper.cpp
+	mkdir -p ./build
+	v++ -c -t hw --config ./u55c.cfg kernel_wrapper.cpp firmware/myproject.cpp -o ./build/myproject_kernel.xo $(KERN_LIBRARIES)
+ 
+myproject_kernel.xclbin: ./build/myproject_kernel.xo
+	v++ -l -t hw --config ./u55c.cfg ./build/myproject_kernel.xo -o kernel_wrapper.xclbin
+
+# Building Host 
+INCLUDES += -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -I$(XILINX_HLS)/include/ \
+			-I$(PWD)/libs/ -I$(PWD)/firmware/ -I$(PWD)/firmware/nnet_utils/
+CXXFLAGS += -Wall -std=c++11 -Wno-unknown-pragmas -g -O0 
+LDFLAGS = -L$(XILINX_XRT)/lib/ -lstdc++ -lpthread -lrt -lOpenCL
+
+host: myproject_host_cl.cpp libs/xcl2.cpp
+	$(CXX) $(CXXFLAGS) $^ -o $@ $(INCLUDES) $(LDFLAGS) 
+
+.PHONY: kernel
+kernel: myproject_kernel.xclbin
+
+# Cleaning stuff
+.PHONY: clean
+clean:
+	-rm -rf host libs/*.o
+	-rm -rf *.xclbin*
+	-rm -rf build*
+	-rm -rf *.log *.jou *.rpt *.csv *.mdb *.ltx
@@ -1,10 +1,29 @@
-platform=myplatform
+kernel=kernel_wrapper
+log_dir=build/logs
+messageDb=build/kernel_wrapper.mdb
+platform=MYPLATFORM
+report_dir=build/reports
 save-temps=1
-kernel=myproject_kernel
-messageDb=build/myproject_kernel.mdb
 temp_dir=build
-report_dir=build/reports
-log_dir=build/logs
 
 [advanced]
-misc=solution_name=myproject_kernel
+prop=kernel.kernel_wrapper.kernel_flags=-std=c++11
+
+[hls]
+pre_tcl=./hls_config.tcl
+
+# hls-fpga-machine-learning kernel control
+
+[vivado]
+prop=run.impl_1.STEPS.OPT_DESIGN.IS_ENABLED=true
+prop=run.impl_1.STEPS.OPT_DESIGN.ARGS.DIRECTIVE=Explore
+
+prop=run.impl_1.STEPS.PLACE_DESIGN.ARGS.DIRECTIVE=AltSpreadLogic_high
+
+prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.IS_ENABLED=true
+prop=run.imp1_1.STEPS.PHYS_OPT_DESIGN.ARGS.DIRECTIVE=AggressiveExplore
+
+prop=run.impl_1.STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE=Explore
+
+prop=run.impl_1.STEPS.POST_ROUTE_PHYS_OPT_DESIGN.IS_ENABLED=true
+prop=run.impl_1.STEPS.POST_ROUTE_PHYS_OPT_DESIGN.ARGS.DIRECTIVE=AggressiveExplore
@@ -0,0 +1 @@
+config_interface -m_axi_auto_max_ports=true
@@ -0,0 +1,10 @@
+#ifndef KERNEL_WRAPPER_H
+#define KERNEL_WRAPPER_H
+
+#include "firmware/defines.h"
+
+// hls-fpga-machine-learning accelerator parameters
+
+// hls-fpga-machine-learning accelerator io
+
+#endif
@@ -0,0 +1,46 @@
+#include "kernel_wrapper.h"
+#include "firmware/myproject.h"
+
+static void read_input(const in_buffer_t *in, in_buffer_t (&in_buf)[BATCHSIZE][DATA_SIZE_IN]) {
+  for (int i = 0; i < BATCHSIZE; i++) {
+      #pragma HLS PIPELINE
+      for(int j = 0; j < DATA_SIZE_IN; j++) { 
+        #pragma HLS UNROLL
+        in_buf[i][j] = in[i * DATA_SIZE_IN + j];
+      }
+    }
+}
+static void run_inference(in_buffer_t (&in_buf)[BATCHSIZE][DATA_SIZE_IN], out_buffer_t (&out_buf)[BATCHSIZE][DATA_SIZE_OUT]) {
+  for (int i = 0; i < BATCHSIZE; i++) {
+      #pragma HLS DATAFLOW
+      myproject(in_buf[i],out_buf[i]);
+    }
+}
+static void write_result(out_buffer_t *out, out_buffer_t (&out_buf)[BATCHSIZE][DATA_SIZE_OUT]) {
+  for (int i = 0; i < BATCHSIZE; i++) {
+    #pragma HLS PIPELINE
+    for (int j = 0; j < DATA_SIZE_OUT; j++) {
+      #pragma HLS UNROLL
+      out[i * DATA_SIZE_OUT + j] = out_buf[i][j];
+    }
+  }
+}
+
+extern "C" {
+  /**
+    \brief HLS4ML Kernel Implementation 
+    \param in Input Vector
+    \param out Output Vector
+*/
+  void kernel_wrapper(const in_buffer_t *in, out_buffer_t *out) {
+    in_buffer_t in_buf[BATCHSIZE][DATA_SIZE_IN];
+    out_buffer_t out_buf[BATCHSIZE][DATA_SIZE_OUT];
+    #pragma HLS ARRAY_RESHAPE   variable=in_buf  complete dim=2
+    #pragma HLS ARRAY_RESHAPE   variable=out_buf complete dim=2
+
+    #pragma HLS DATAFLOW
+    read_input(in, in_buf);
+    run_inference(in_buf, out_buf);
+    write_result(out, out_buf);
+  }
+}
@@ -0,0 +1,43 @@
+#include "kernel_wrapper.h"
+#include "firmware/myproject.h"
+
+static void read_input(const in_buffer_t *in, hls::stream<input_t> &input, int n) {
+  for (int i = 0; i < DATA_SIZE_IN; i++) {
+    #pragma HLS PIPELINE
+    input_t tmp;
+    for (int j = 0; j < NNET_ARRAY_DEPTH; j++) {
+      #pragma HLS UNROLL
+      tmp[j] = in[(n * DATA_SIZE_IN * NNET_ARRAY_DEPTH) + (i * NNET_ARRAY_DEPTH) + j];
+    }
+    input << tmp;
+  }
+}
+
+static void write_result(out_buffer_t *out, hls::stream<result_t> &output, int n) {
+  result_t tmp = output.read();
+  for (int i = 0; i < DATA_SIZE_OUT; i++) {
+    #pragma HLS UNROLL
+    out[(n * DATA_SIZE_OUT) + i] = tmp[i];
+  }
+}
+
+extern "C" {
+  /**
+    \brief HLS4ML Kernel Implementation 
+    \param in Input Vector
+    \param out Output Vector
+*/
+  void kernel_wrapper(const in_buffer_t *in, out_buffer_t *out) {
+    hls::stream<input_t> input("input");
+    hls::stream<result_t> output("output");
+    #pragma HLS STREAM variable=input depth=DATA_SIZE_IN
+    #pragma HLS STREAM variable=output depth=1
+    
+    for (int n = 0; n < BATCHSIZE; n++) {
+    #pragma HLS DATAFLOW          
+      read_input(in, input, n);
+      myproject(input, output);
+      write_result(out, output, n);
+    }
+  }
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+config_interface -m_axi_auto_max_ports=true`