NVIDIA-AI-IOT
diff --git a/‎README.md
Lines changed: 11 additions & 2 deletions b/‎README.md
Lines changed: 11 additions & 2 deletions
diff --git a/‎ros/packages/caffe_ros/CMakeLists.txt
Lines changed: 9 additions & 66 deletions b/‎ros/packages/caffe_ros/CMakeLists.txt
Lines changed: 9 additions & 66 deletions
diff --git a/‎ros/packages/caffe_ros/include/caffe_ros/int8_calibrator.h
Lines changed: 40 additions & 0 deletions b/‎ros/packages/caffe_ros/include/caffe_ros/int8_calibrator.h
Lines changed: 40 additions & 0 deletions
diff --git a/‎ros/packages/caffe_ros/include/caffe_ros/internal_utils.h
Lines changed: 27 additions & 0 deletions b/‎ros/packages/caffe_ros/include/caffe_ros/internal_utils.h
Lines changed: 27 additions & 0 deletions
diff --git a/‎ros/packages/caffe_ros/include/caffe_ros/tensor_net.h
Lines changed: 21 additions & 21 deletions b/‎ros/packages/caffe_ros/include/caffe_ros/tensor_net.h
Lines changed: 21 additions & 21 deletions
diff --git a/‎ros/packages/caffe_ros/launch/caffe_gscam.launch
Lines changed: 2 additions & 2 deletions b/‎ros/packages/caffe_ros/launch/caffe_gscam.launch
Lines changed: 2 additions & 2 deletions
diff --git a/‎ros/packages/caffe_ros/launch/caffe_gscam_h264.launch
Lines changed: 2 additions & 2 deletions b/‎ros/packages/caffe_ros/launch/caffe_gscam_h264.launch
Lines changed: 2 additions & 2 deletions
diff --git a/‎ros/packages/caffe_ros/launch/caffe_gscam_h265.launch
Lines changed: 2 additions & 2 deletions b/‎ros/packages/caffe_ros/launch/caffe_gscam_h265.launch
Lines changed: 2 additions & 2 deletions
diff --git a/‎ros/packages/caffe_ros/launch/everything.launch
Lines changed: 4 additions & 4 deletions b/‎ros/packages/caffe_ros/launch/everything.launch
Lines changed: 4 additions & 4 deletions
@@ -6,6 +6,8 @@ This project contains deep neural networks, computer vision and control code, ha
 
 The project's deep neural networks (DNNs) can be trained from scratch using publicly available data. A few [pre-trained DNNs](../blob/master/models/pretrained/) are also available as a part of this project. In case you want to train TrailNet DNN from scratch, follow the steps on [this page](./Models).
 
+The project also contains [Stereo DNN](../blob/master/stereoDNN/) models and runtime which allow to estimate depth from stereo camera on NVIDIA platforms.
+
 ## References and Demos
 * [arXiv paper](https://arxiv.org/abs/1705.02550)
 * GTC 2017 talk: [slides](http://on-demand.gputechconf.com/gtc/2017/presentation/s7172-nikolai-smolyanskiy-autonomous-drone-navigation-with-deep-learning.pdf), [video](http://on-demand.gputechconf.com/gtc/2017/video/s7172-smolyanskiy-autonomous-drone-navigation-with-deep-learning%20(1).PNG.mp4)
@@ -14,8 +16,15 @@ The project's deep neural networks (DNNs) can be trained from scratch using publ
 * [Demo video showing generalization to ground vehicle control and other environments](https://www.youtube.com/watch?v=ZKF5N8xUxfw)
 
 # News
-* **2018-02-15**: added support for the TBS discovery platform.
-  * Step by step instructions on how to assemble the [TBS discovery drone](../../wiki/Skypad-TBS-Discovery-Setup).
+**GTC 2018**: in case you will be at [GTC 2018](https://www.nvidia.com/en-us/gtc/) next week, you may be interested in attending our [Stereo DNN session](https://2018gputechconf.smarteventscloud.com/connect/sessionDetail.ww?SESSION_ID=152050). We'll be happy to chat about redtail in general and some of the interesting work that we've been doing.
+
+* **2018-03-22**: redtail 2.0.
+  * Added Stereo DNN models and inference library (TensorFlow/TensorRT). For more details, see the [README](../blob/master/stereoDNN/).
+  * Migrated to JetPack 3.2. This change brings latest components such as CUDA 9.0, cuDNN 7.0, TensorRT 3.0, OpenCV 3.3 and others to Jetson platform. Note that this is a breaking change.
+  * Added support for INT8 inference. This enables fast inference on devices that have hardware implementation of INT8 instructions. More details are on [our wiki](../../wiki/ROS-Nodes#int8-inference).
+
+* **2018-02-15**: added support for the TBS Discovery platform.
+  * Step by step instructions on how to assemble the [TBS Discovery drone](../../wiki/Skypad-TBS-Discovery-Setup).
   * Instructions on how to attach and use a [ZED stereo camera](https://www.stereolabs.com/zed/).
   * Detailed instructions on how to calibrate, test and fly the drone.
 
 
@@ -12,64 +12,13 @@ find_package(catkin REQUIRED COMPONENTS
 ## System dependencies are found with CMake's conventions
 #find_package(Boost REQUIRED COMPONENTS system)
 find_package(CUDA)
-# OpenCV 2.4.13 is default on Jetson. Might be required to install in simulation environment.
-find_package(OpenCV 2.4.13 REQUIRED)
-
-## Uncomment this if the package has a setup.py. This macro ensures
-## modules and global scripts declared therein get installed
-## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
-# catkin_python_setup()
-
-################################################
-## Declare ROS messages, services and actions ##
-################################################
-
-## To declare and build messages, services or actions from within this
-## package, follow these steps:
-## * Let MSG_DEP_SET be the set of packages whose message types you use in
-##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
-## * In the file package.xml:
-##   * add a build_depend tag for "message_generation"
-##   * add a build_depend and a run_depend tag for each package in MSG_DEP_SET
-##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
-##     but can be declared for certainty nonetheless:
-##     * add a run_depend tag for "message_runtime"
-## * In this file (CMakeLists.txt):
-##   * add "message_generation" and every package in MSG_DEP_SET to
-##     find_package(catkin REQUIRED COMPONENTS ...)
-##   * add "message_runtime" and every package in MSG_DEP_SET to
-##     catkin_package(CATKIN_DEPENDS ...)
-##   * uncomment the add_*_files sections below as needed
-##     and list every .msg/.srv/.action file to be processed
-##   * uncomment the generate_messages entry below
-##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
-
-## Generate messages in the 'msg' folder
-# add_message_files(
-#   FILES
-#   Message1.msg
-#   Message2.msg
-# )
-
-## Generate services in the 'srv' folder
-# add_service_files(
-#   FILES
-#   Service1.srv
-#   Service2.srv
-# )
-
-## Generate actions in the 'action' folder
-# add_action_files(
-#   FILES
-#   Action1.action
-#   Action2.action
-# )
-
-## Generate added messages and services with any dependencies listed here
-# generate_messages(
-#   DEPENDENCIES
-#   std_msgs
-# )
+# OpenCV 3.3.1 is default on Jetson starting JetPack 3.2. Might be required to install in simulation environment.
+# PATHS is required as ROS Kinetic installs its own version of OpenCV 3.3.1 without CUDA support.
+find_package(OpenCV 3.3.1 REQUIRED
+  CONFIG
+  PATHS /usr/local /usr
+  NO_DEFAULT_PATH
+)
 
 ###################################
 ## catkin specific configuration ##
@@ -95,8 +44,8 @@ set (CMAKE_CXX_STANDARD 14)
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
 
 set(
-	CUDA_NVCC_FLAGS
-	${CUDA_NVCC_FLAGS}; 
+  CUDA_NVCC_FLAGS
+  ${CUDA_NVCC_FLAGS}; 
     -O3 -gencode arch=compute_53,code=sm_53
 )
 
@@ -108,11 +57,6 @@ include_directories(
   ${catkin_INCLUDE_DIRS}
 )
 
-## Declare a C++ library
-# add_library(caffe_ros
-#   src/${PROJECT_NAME}/caffe_ros.cpp
-# )
-
 ## Add cmake target dependencies of the library
 ## as an example, code may need to be generated before libraries
 ## either from message generation or dynamic reconfigure
@@ -134,7 +78,6 @@ target_link_libraries(caffe_ros_node
   nvinfer
   opencv_core
   opencv_imgproc
-  opencv_gpu
   opencv_highgui
 )
 
 
@@ -0,0 +1,40 @@
+// Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
+// Full license terms provided in LICENSE.md file.
+
+#ifndef CAFFE_ROS_INT8_CALIBRATOR_H
+#define CAFFE_ROS_INT8_CALIBRATOR_H
+
+#include <NvInfer.h>
+#include <string>
+#include "internal_utils.h"
+
+namespace caffe_ros
+{
+class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator
+{
+public:
+    Int8EntropyCalibrator(ConstStr& src, ConstStr& calib_cache);
+    ~Int8EntropyCalibrator();
+
+    int  getBatchSize() const override { return 1; }
+    bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
+
+    const void* readCalibrationCache(size_t& length) override;
+    void writeCalibrationCache(const void* cache, size_t length) override;
+
+    void setInputDims(nvinfer1::DimsCHW dims);
+
+private:
+    std::string src_;
+    std::string calib_cache_;
+    
+    std::deque<std::string> files_;
+
+    nvinfer1::DimsCHW dims_ = nvinfer1::DimsCHW(0, 0, 0);
+
+    float* img_d_ = nullptr;
+};
+
+}
+
+#endif
@@ -0,0 +1,27 @@
+// Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
+// Full license terms provided in LICENSE.md file.
+
+#ifndef CAFFE_ROS_INTERNAL_UTILS_H
+#define CAFFE_ROS_INTERNAL_UTILS_H
+
+#include <opencv2/opencv.hpp>
+
+namespace caffe_ros
+{
+using ConstStr = const std::string;
+
+// Formats of the input layer. BGR is usually used by most of the frameworks that use OpenCV.
+enum class InputFormat
+{
+    BGR = 0,
+    RGB
+};
+
+// Performs image preprocessing (resizing, scaling, format conversion etc)
+// that is done before feeding the image into the networ.
+cv::Mat preprocessImage(cv::Mat img, int dst_img_w, int dst_img_h, InputFormat inp_fmt, ConstStr& encoding,
+                        float inp_scale, float inp_shift);
+
+}
+
+#endif
@@ -7,22 +7,23 @@
 #include <ros/ros.h>
 #include <NvInfer.h>
 #include <opencv2/opencv.hpp>
-#include <opencv2/gpu/gpu.hpp>
+// REVIEW alexeyk: OpenCV that comes with JetPack 3.2 is compiled without CUDA support.
+// #include <opencv2/core/cuda.hpp>
+
+#include "int8_calibrator.h"
 
 namespace caffe_ros
 {
 
 class TensorNet
 {
 public:
-    using ConstStr = const std::string;
-    
     TensorNet();
     virtual ~TensorNet();
 
-    void loadNetwork(ConstStr& prototxtPath, ConstStr& modelPath,
-                     ConstStr& inputBlob = "data", ConstStr& outputBlob = "prob",
-                     bool useFP16 = true, bool use_cached_model = true);
+    void loadNetwork(ConstStr& prototxt_path, ConstStr& model_path,
+                     ConstStr& input_blob, ConstStr& output_blob,
+                     nvinfer1::DataType data_type, bool use_cached_model);
 
     void forward(const unsigned char* input, size_t w, size_t h, size_t c, const std::string& encoding);
 
@@ -36,15 +37,15 @@ class TensorNet
 
     const float* getOutput() const { return out_h_; }
 
-    void setInputFormat(ConstStr& inputFormat)
+    void setInputFormat(ConstStr& input_format)
     {
-        if (inputFormat == "BGR")
+        if (input_format == "BGR")
             inp_fmt_ = InputFormat::BGR;
-        else if (inputFormat == "RGB")
+        else if (input_format == "RGB")
             inp_fmt_ = InputFormat::RGB;
         else
         {
-            ROS_FATAL("Input format %s is not supported. Supported formats: BGR and RGB", inputFormat.c_str());
+            ROS_FATAL("Input format %s is not supported. Supported formats: BGR and RGB", input_format.c_str());
             ros::shutdown();
         }
     }
@@ -68,16 +69,12 @@ class TensorNet
         context_->setProfiler(on ? &s_profiler : nullptr);
     }
 
-protected:
+    void createInt8Calibrator(ConstStr& int8_calib_src, ConstStr& int8_calib_cache);
 
-    // Formats of the input layer. BGR is usually used by most of the frameworks that use OpenCV.
-    enum class InputFormat
-    {
-        BGR = 0,
-        RGB
-    };
+protected:
 
-    void profileModel(ConstStr& prototxtPath, ConstStr& modelPath, bool useFP16, ConstStr& outputBlob, std::ostream& model);
+    void profileModel(ConstStr& prototxt_path, ConstStr& model_path, nvinfer1::DataType data_type,
+                      ConstStr& input_blob, ConstStr& output_blob, std::ostream& model);
 
     class Logger : public nvinfer1::ILogger
     {
@@ -109,9 +106,7 @@ class TensorNet
 
     cv::Mat in_h_;
     cv::Mat in_final_h_;
-    // cv::gpu::GpuMat m_inOrigD;
-    // cv::gpu::GpuMat m_inD;
-    cv::gpu::GpuMat in_d_;
+    float* in_d_  = nullptr;
     float* out_h_ = nullptr;
     float* out_d_ = nullptr;
 
@@ -121,8 +116,13 @@ class TensorNet
     // This is a separate flag from ROS_DEBUG to enable only specific profiling
     // of data preparation and DNN feed forward.
     bool debug_mode_ = false;
+
+    std::unique_ptr<Int8EntropyCalibrator> int8_calib_;
 };
 
+nvinfer1::DataType parseDataType(const std::string& src);
+std::string        toString(nvinfer1::DataType src);
+
 }
 
 #endif
@@ -3,7 +3,7 @@
     <arg name="model_path" />
     <arg name="input_layer"  default="data" />
     <arg name="output_layer" default="prob" />
-    <arg name="use_fp16"     default="true" />
+    <arg name="data_type"    default="fp16" />
     <arg name="mean"         default="0" />
     <arg name="debug_mode"   default="false" />
 
@@ -29,7 +29,7 @@
         <param name="model_path"    value="$(arg model_path)" />
         <param name="input_layer"   value="$(arg input_layer)" />
         <param name="output_layer"  value="$(arg output_layer)" />
-        <param name="use_fp16"      value="$(arg use_fp16)" />
+        <param name="data_type"     value="$(arg data_type)" />
         <param name="mean"          value="$(arg mean)" />
         <param name="debug_mode"    value="$(arg debug_mode)" />
     </node>
 
@@ -3,7 +3,7 @@
     <arg name="model_path" />
     <arg name="input_layer"  default="data" />
     <arg name="output_layer" default="prob" />
-    <arg name="use_fp16"     default="true" />
+    <arg name="data_type"    default="fp16" />
     <arg name="mean"         default="0" />
     <arg name="debug_mode"   default="false" />
 
@@ -26,7 +26,7 @@
         <param name="model_path"    value="$(arg model_path)" />
         <param name="input_layer"   value="$(arg input_layer)" />
         <param name="output_layer"  value="$(arg output_layer)" />
-        <param name="use_fp16"      value="$(arg use_fp16)" />
+        <param name="data_type"     value="$(arg data_type)" />
         <param name="mean"          value="$(arg mean)" />
         <param name="debug_mode"    value="$(arg debug_mode)" />
     </node>
 
@@ -3,7 +3,7 @@
     <arg name="model_path" />
     <arg name="input_layer"  default="data" />
     <arg name="output_layer" default="prob" />
-    <arg name="use_fp16"     default="true" />
+    <arg name="data_type"    default="fp16" />
     <arg name="mean"         default="0" />
     <arg name="debug_mode"   default="false" />
 
@@ -26,7 +26,7 @@
         <param name="model_path"    value="$(arg model_path)" />
         <param name="input_layer"   value="$(arg input_layer)" />
         <param name="output_layer"  value="$(arg output_layer)" />
-        <param name="use_fp16"      value="$(arg use_fp16)" />
+        <param name="data_type"     value="$(arg data_type)" />
         <param name="mean"          value="$(arg mean)" />
         <param name="debug_mode"    value="$(arg debug_mode)" />
     </node>
 
@@ -2,13 +2,13 @@
     <arg name="trail_prototxt_path" default="/home/nvidia/redtail/models/pretrained/TrailNet_SResNet-18.prototxt" />
     <arg name="trail_model_path"    default="/home/nvidia/redtail/models/pretrained/TrailNet_SResNet-18.caffemodel" />
     <arg name="trail_output_layer"  default="out" />
-    <arg name="trail_use_fp16"      default="true" />
+    <arg name="trail_data_type"     default="fp16" />
     <arg name="trail_rate_hz"       default="30" />
 
     <arg name="object_prototxt_path" default="/home/nvidia/redtail/models/pretrained/yolo-relu.prototxt" />
     <arg name="object_model_path"    default="/home/nvidia/redtail/models/pretrained/yolo-relu.caffemodel" />
     <arg name="object_output_layer"  default="fc25" />
-    <arg name="object_use_fp16"      default="true" />
+    <arg name="object_data_type"     default="fp16" />
     <arg name="object_rate_hz"       default="1" />
     <arg name="obj_det_threshold"    default="0.2" />
 
@@ -40,7 +40,7 @@
         <param name="prototxt_path" value="$(arg trail_prototxt_path)" />
         <param name="model_path"    value="$(arg trail_model_path)" />
         <param name="output_layer"  value="$(arg trail_output_layer)" />
-        <param name="use_fp16"      value="$(arg trail_use_fp16)" />
+        <param name="data_type"     value="$(arg trail_data_type)" />
         <param name="max_rate_hz"   value="$(arg trail_rate_hz)" />
     </node>
 
@@ -54,7 +54,7 @@
         <param name="post_proc"     value="YOLO" />
         <param name="obj_det_threshold" value="$(arg obj_det_threshold)" />
         <param name="iou_threshold"     value="0.2" />
-        <param name="use_fp16"      value="$(arg object_use_fp16)" />
+        <param name="data_type"     value="$(arg object_data_type)" />
         <param name="max_rate_hz"   value="$(arg object_rate_hz)" />
     </node>