Skip to content

Commit ef72847

Browse files
Merge pull request #37 from NVIDIA-Jetson/alexeyk/scratch
Added support for bgr8 image encoding used in ZED camera ROS node.
2 parents bb17c65 + fb351d5 commit ef72847

File tree

7 files changed

+137
-31
lines changed

7 files changed

+137
-31
lines changed

ros/packages/caffe_ros/include/caffe_ros/tensor_net.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class TensorNet
2424
ConstStr& inputBlob = "data", ConstStr& outputBlob = "prob",
2525
bool useFP16 = true, bool use_cached_model = true);
2626

27-
void forward(const unsigned char* input, size_t w, size_t h, size_t c);
27+
void forward(const unsigned char* input, size_t w, size_t h, size_t c, const std::string& encoding);
2828

2929
int getInWidth() const { return in_dims_.w(); }
3030
int getInHeight() const { return in_dims_.h(); }
@@ -97,13 +97,14 @@ class TensorNet
9797
};
9898
static Profiler s_profiler;
9999

100-
nvinfer1::IRuntime* infer_;
101-
nvinfer1::ICudaEngine* engine_;
102-
nvinfer1::IExecutionContext* context_;
100+
nvinfer1::IRuntime* infer_;
101+
nvinfer1::ICudaEngine* engine_;
102+
nvinfer1::IExecutionContext* context_;
103103

104104
nvinfer1::DimsCHW in_dims_;
105105
nvinfer1::DimsCHW out_dims_;
106106

107+
// DNN input format.
107108
InputFormat inp_fmt_ = InputFormat::BGR;
108109

109110
cv::Mat in_h_;

ros/packages/caffe_ros/src/caffe_ros.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ sensor_msgs::Image::ConstPtr CaffeRos::computeOutputs()
110110
return nullptr;
111111

112112
auto img = *cur_img_;
113-
net_.forward(img.data.data(), img.width, img.height, 3);
113+
net_.forward(img.data.data(), img.width, img.height, 3, img.encoding);
114114
auto out_msg = boost::make_shared<sensor_msgs::Image>();
115115
// Set stamp and frame id to the same value as source image so we can synchronize with other nodes if needed.
116116
out_msg->header.stamp.sec = img.header.stamp.sec;
@@ -184,9 +184,9 @@ void CaffeRos::imageCallback(const sensor_msgs::Image::ConstPtr& msg)
184184
auto img = *msg;
185185
//ROS_DEBUG("imageCallback: %u, %u, %s", img.width, img.height, img.encoding.c_str());
186186
// Only RGB8 is currently supported.
187-
if (img.encoding != "rgb8")
187+
if (img.encoding != "rgb8" && img.encoding != "bgr8")
188188
{
189-
ROS_FATAL("Image encoding %s is not yet supported. Supported encodings: rgb8", img.encoding.c_str());
189+
ROS_FATAL("Image encoding %s is not yet supported. Supported encodings: rgb8, bgr8", img.encoding.c_str());
190190
ros::shutdown();
191191
}
192192
cur_img_ = msg;

ros/packages/caffe_ros/src/tensor_net.cpp

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ void TensorNet::profileModel(ConstStr& prototxt_path, ConstStr& model_path, bool
8282
if (has_fast_FP16 && !use_FP16)
8383
ROS_INFO("... however, the model will be loaded as FP32.");
8484

85-
nvinfer1::DataType model_data_type = (has_fast_FP16 && use_FP16) ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
86-
auto blob_finder = parser->parse(prototxt_path.c_str(), model_path.c_str(), *network, model_data_type);
85+
nvinfer1::DataType model_data_type = (has_fast_FP16 && use_FP16) ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT;
86+
auto blob_finder = parser->parse(prototxt_path.c_str(), model_path.c_str(), *network, model_data_type);
8787
if (blob_finder == nullptr)
8888
{
8989
ROS_FATAL("Failed to parse network: %s, %s", prototxt_path.c_str(), model_path.c_str());
@@ -102,13 +102,13 @@ void TensorNet::profileModel(ConstStr& prototxt_path, ConstStr& model_path, bool
102102
// Build model.
103103
// REVIEW alexeyk: make configurable?
104104
// Note: FP16 requires batch size to be even, TensorRT will switch automatically when building an engine.
105-
builder->setMaxBatchSize(1);
106-
builder->setMaxWorkspaceSize(16 * 1024 * 1024);
105+
builder->setMaxBatchSize(1);
106+
builder->setMaxWorkspaceSize(16 * 1024 * 1024);
107107

108108
builder->setHalf2Mode(has_fast_FP16 && use_FP16);
109109

110110
ROS_INFO("Building CUDA engine...");
111-
auto engine = builder->buildCudaEngine(*network);
111+
auto engine = builder->buildCudaEngine(*network);
112112
if (engine == nullptr)
113113
{
114114
ROS_FATAL("Failed to build CUDA engine.");
@@ -126,8 +126,8 @@ void TensorNet::profileModel(ConstStr& prototxt_path, ConstStr& model_path, bool
126126
// Cleanup.
127127
network->destroy();
128128
parser->destroy();
129-
engine->destroy();
130-
builder->destroy();
129+
engine->destroy();
130+
builder->destroy();
131131
}
132132

133133
void TensorNet::loadNetwork(ConstStr& prototxt_path, ConstStr& model_path,
@@ -166,14 +166,14 @@ void TensorNet::loadNetwork(ConstStr& prototxt_path, ConstStr& model_path,
166166
model.seekg(0, model.beg);
167167
const auto& model_final = model.str();
168168

169-
engine_ = infer_->deserializeCudaEngine(model_final.c_str(), model_final.size(), nullptr);
169+
engine_ = infer_->deserializeCudaEngine(model_final.c_str(), model_final.size(), nullptr);
170170
if (engine_ == nullptr)
171171
{
172172
ROS_FATAL("Failed to deserialize engine.");
173173
ros::shutdown();
174174
}
175175

176-
context_ = engine_->createExecutionContext();
176+
context_ = engine_->createExecutionContext();
177177
if (context_ == nullptr)
178178
{
179179
ROS_FATAL("Failed to create execution context.");
@@ -182,14 +182,14 @@ void TensorNet::loadNetwork(ConstStr& prototxt_path, ConstStr& model_path,
182182
ROS_INFO("Created CUDA engine and context.");
183183

184184
int iinp = engine_->getBindingIndex(input_blob.c_str());
185-
in_dims_ = DimsToCHW(engine_->getBindingDimensions(iinp));
185+
in_dims_ = DimsToCHW(engine_->getBindingDimensions(iinp));
186186
ROS_INFO("Input : (W:%4u, H:%4u, C:%4u).", in_dims_.w(), in_dims_.h(), in_dims_.c());
187187
//cv::gpu::ensureSizeIsEnough(in_dims_.h(), in_dims_.w(), CV_8UC3, in_d_);
188188
in_d_ = cv::gpu::createContinuous(in_dims_.c(), in_dims_.w() * in_dims_.h(), CV_32FC1);
189189
assert(in_d_.isContinuous());
190190

191191
int iout = engine_->getBindingIndex(output_blob.c_str());
192-
out_dims_ = DimsToCHW(engine_->getBindingDimensions(iout));
192+
out_dims_ = DimsToCHW(engine_->getBindingDimensions(iout));
193193
ROS_INFO("Output: (W:%4u, H:%4u, C:%4u).", out_dims_.w(), out_dims_.h(), out_dims_.c());
194194

195195
// Allocate mapped memory for the outputs.
@@ -206,8 +206,9 @@ void TensorNet::loadNetwork(ConstStr& prototxt_path, ConstStr& model_path,
206206
}
207207
}
208208

209-
void TensorNet::forward(const unsigned char* input, size_t w, size_t h, size_t c)
209+
void TensorNet::forward(const unsigned char* input, size_t w, size_t h, size_t c, const std::string& encoding)
210210
{
211+
ROS_ASSERT(encoding == "rgb8" || encoding == "bgr8");
211212
ROS_ASSERT(c == (size_t)in_dims_.c());
212213
//ROS_DEBUG("Forward: input image is (%zu, %zu, %zu), network input is (%u, %u, %u)", w, h, c, in_dims_.w(), in_dims_.h(), in_dims_.c());
213214

@@ -217,9 +218,19 @@ void TensorNet::forward(const unsigned char* input, size_t w, size_t h, size_t c
217218
ros::Time start = ros::Time::now();
218219

219220
in_h_ = cv::Mat((int)h, (int)w, CV_8UC3, (void*)input);
220-
// Convert image from RGB to BGR format used by OpenCV if needed.
221+
// Handle encodings.
221222
if (inp_fmt_ == InputFormat::BGR)
222-
cv::cvtColor(in_h_, in_h_, CV_RGB2BGR);
223+
{
224+
// Convert image from RGB to BGR format used by OpenCV if needed.
225+
if (encoding == "rgb8")
226+
cv::cvtColor(in_h_, in_h_, CV_RGB2BGR);
227+
}
228+
else if (inp_fmt_ == InputFormat::RGB)
229+
{
230+
// Input image in OpenCV BGR, convert to RGB.
231+
if (encoding == "bgr8")
232+
cv::cvtColor(in_h_, in_h_, CV_BGR2RGB);
233+
}
223234
//ROS_INFO("Dims: (%zu, %zu) -> (%zu, %zu)", w, h, (size_t)in_dims_.w(), (size_t)in_dims_.h());
224235
// Convert to floating point type.
225236
in_h_.convertTo(in_h_, CV_32F);

ros/packages/caffe_ros/tests/tests.cpp

Lines changed: 67 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,17 @@ class CaffeRosTestsCallback
2626
sensor_msgs::Image::ConstPtr dnn_out_;
2727
};
2828

29-
static boost::shared_ptr<sensor_msgs::Image> readImage(const std::string& filename)
29+
static boost::shared_ptr<sensor_msgs::Image> readImage(const std::string& filename, const std::string& encoding = "rgb8")
3030
{
31+
EXPECT_TRUE(encoding == "rgb8" || encoding == "bgr8");
3132
auto img = cv::imread(filename);
3233
SCOPED_TRACE(filename);
3334
EXPECT_TRUE(img.cols > 0 && img.rows > 0);
3435
// Convert image from BGR format used by OpenCV to RGB.
35-
cv::cvtColor(img, img, CV_BGR2RGB);
36+
if (encoding == "rgb8")
37+
cv::cvtColor(img, img, CV_BGR2RGB);
3638
auto img_msg = boost::make_shared<sensor_msgs::Image>();
37-
img_msg->encoding = "rgb8";
39+
img_msg->encoding = encoding;
3840
img_msg->width = img.cols;
3941
img_msg->height = img.rows;
4042
img_msg->step = img_msg->width * img.channels();
@@ -88,9 +90,67 @@ TEST(CaffeRosTests, TrailNetPredictions)
8890
auto dnn_out = *t.dnn_out_;
8991
// The output should be 1x1x6 (HxWxC).
9092
EXPECT_EQ(dnn_out.width, 1);
91-
EXPECT_EQ(dnn_out.height, 1);
93+
EXPECT_EQ(dnn_out.height, 1);
9294
// float32, channels == 6.
93-
EXPECT_EQ(dnn_out.encoding, "32FC6");
95+
EXPECT_EQ(dnn_out.encoding, "32FC6");
96+
97+
auto data = reinterpret_cast<const float*>(dnn_out.data.data());
98+
for (int col = 0; col < 6; col++)
99+
{
100+
// Must use proper floating point comparison.
101+
EXPECT_NEAR(data[col], predictions[i][col], 0.001f) << "Values are not equal at (" << i << ", " << col <<")";
102+
}
103+
}
104+
}
105+
106+
TEST(CaffeRosTests, TrailNetPredictionsBGR8)
107+
{
108+
ros::NodeHandle nh("~");
109+
std::string test_data_dir;
110+
nh.param<std::string>("test_data_dir", test_data_dir, "");
111+
ASSERT_TRUE(fs::exists(test_data_dir));
112+
113+
CaffeRosTestsCallback t;
114+
auto dnn_sub = nh.subscribe<sensor_msgs::Image>("/trails_dnn/network/output", 1,
115+
&CaffeRosTestsCallback::dnnCallback, &t);
116+
const char* camera_topic = "/camera_trails/image_raw";
117+
auto img_pub = nh.advertise<sensor_msgs::Image>(camera_topic, 1);
118+
119+
// Test images and expected predictions.
120+
auto images = std::vector<std::string>{"rot_l.jpg", "rot_c.jpg", "rot_r.jpg", "tran_l.jpg", "tran_r.jpg"};
121+
float predictions[][6] = {{0.932, 0.060, 0.006, 0.080, 0.848, 0.071},
122+
{0.040, 0.958, 0.001, 0.488, 0.375, 0.135},
123+
{0.000, 0.027, 0.971, 0.036, 0.407, 0.555},
124+
{0.011, 0.988, 0.000, 0.981, 0.008, 0.009},
125+
{0.000, 0.855, 0.144, 0.013, 0.031, 0.954}};
126+
127+
// When running using rostest, current directory is $HOME/.ros
128+
fs::path data_dir{test_data_dir};
129+
130+
for (size_t i = 0; i < images.size(); i++)
131+
{
132+
auto img_msg = readImage((data_dir / images[i]).string(), "bgr8");
133+
// Use image index as a unique timestamp.
134+
img_msg->header.stamp.sec = 0;
135+
img_msg->header.stamp.nsec = (int)i;
136+
137+
ros::Rate rate(1000);
138+
// Wait until DNN processes the current messages. There might be multiple messages
139+
// in the queue so make sure to select the right one based on current index.
140+
while (ros::ok() && (t.dnn_out_ == nullptr || t.dnn_out_->header.stamp.nsec != i))
141+
{
142+
img_pub.publish(img_msg);
143+
ros::spinOnce();
144+
rate.sleep();
145+
}
146+
147+
EXPECT_TRUE(t.dnn_out_ != nullptr);
148+
auto dnn_out = *t.dnn_out_;
149+
// The output should be 1x1x6 (HxWxC).
150+
EXPECT_EQ(dnn_out.width, 1);
151+
EXPECT_EQ(dnn_out.height, 1);
152+
// float32, channels == 6.
153+
EXPECT_EQ(dnn_out.encoding, "32FC6");
94154

95155
auto data = reinterpret_cast<const float*>(dnn_out.data.data());
96156
for (int col = 0; col < 6; col++)
@@ -146,9 +206,9 @@ TEST(CaffeRosTests, TrailNetPredictionsFP16)
146206
auto dnn_out = *t.dnn_out_;
147207
// The output should be 1x1x6 (HxWxC).
148208
EXPECT_EQ(dnn_out.width, 1);
149-
EXPECT_EQ(dnn_out.height, 1);
209+
EXPECT_EQ(dnn_out.height, 1);
150210
// float32, channels == 6.
151-
EXPECT_EQ(dnn_out.encoding, "32FC6");
211+
EXPECT_EQ(dnn_out.encoding, "32FC6");
152212

153213
auto data = reinterpret_cast<const float*>(dnn_out.data.data());
154214
for (int col = 0; col < 6; col++)

ros/packages/px4_controller/launch/aion_ctl.launch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
<arg name="linear_speed" default="2.0" />
55
<arg name="linear_speed_scale" default="90" />
66
<arg name="turn_angle_scale" default="-250" />
7-
<arg name="dnn_turn_angle" default="30.0" />
8-
<arg name="dnn_lateralcorr_angle" default="30.0" />
7+
<arg name="dnn_turn_angle" default="15.0" />
8+
<arg name="dnn_lateralcorr_angle" default="15.0" />
99
<arg name="joy_type" default="shield" />
1010
<arg name="log_output" default="screen" />
1111

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<launch>
2+
<!--
3+
redtail debug .launch file for ZED camera which uses ZED ROS node.
4+
-->
5+
<arg name="prototxt_path" default="/home/nvidia/redtail/models/pretrained/TrailNet_SResNet-18.prototxt"/>
6+
<arg name="model_path" default="/home/nvidia/redtail/models/pretrained/TrailNet_SResNet-18.caffemodel" />
7+
<arg name="input_layer" default="data" />
8+
<arg name="output_layer" default="out" />
9+
<arg name="use_fp16" default="true" />
10+
11+
<arg name="frame_id" default="/tf_frame" />
12+
13+
<!-- Start ZED ROS node. -->
14+
<include file="$(find zed_wrapper)/launch/zed.launch" />
15+
16+
<!-- Start the caffe_ros node -->
17+
<node pkg="caffe_ros" type="caffe_ros_node" name="trails_dnn" output="screen">
18+
<param name="camera_topic" value="/zed/left/image_rect_color" />
19+
<param name="prototxt_path" value="$(arg prototxt_path)" />
20+
<param name="model_path" value="$(arg model_path)" />
21+
<param name="input_layer" value="$(arg input_layer)" />
22+
<param name="output_layer" value="$(arg output_layer)" />
23+
<param name="use_fp16" value="$(arg use_fp16)" />
24+
</node>
25+
26+
<node pkg="redtail_debug" type="redtail_debug_node" name="redtail_debug" output="screen">
27+
</node>
28+
29+
<node pkg="tf" type="static_transform_publisher" name="tf_publisher" args="0 0 0 3.14 0 0 /map $(arg frame_id) 100"/>
30+
</launch>

ros/packages/redtail_debug/launch/trailnet_debug.launch renamed to ros/packages/redtail_debug/launch/trailnet_debug_zed_gscam.launch

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
<launch>
2+
<!--
3+
redtail debug .launch file for ZED camera which uses gscam node (in case ZED ROS node is not available).
4+
Note that gscam will not perform image undistortion despite taking calibration YAML file as an argument.
5+
-->
26
<arg name="prototxt_path" default="/home/nvidia/redtail/models/pretrained/TrailNet_SResNet-18.prototxt"/>
37
<arg name="model_path" default="/home/nvidia/redtail/models/pretrained/TrailNet_SResNet-18.caffemodel" />
48
<arg name="input_layer" default="data" />
@@ -37,5 +41,5 @@
3741
<node pkg="redtail_debug" type="redtail_debug_node" name="redtail_debug" output="screen">
3842
</node>
3943

40-
<node name="tf_publisher" pkg="tf" type="static_transform_publisher" args="0 0 0 3.14 0 0 /map $(arg frame_id) 100"/>
44+
<node pkg="tf" type="static_transform_publisher" name="tf_publisher" args="0 0 0 3.14 0 0 /map $(arg frame_id) 100"/>
4145
</launch>

0 commit comments

Comments
 (0)