Skip to content

[GSoC] Loop closure detection. #3002

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: 5.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/rgbd/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
set(the_description "RGBD algorithms")

ocv_define_module(rgbd opencv_core opencv_3d opencv_imgproc OPTIONAL opencv_viz WRAP python)
ocv_define_module(rgbd opencv_core opencv_3d opencv_imgproc opencv_dnn OPTIONAL opencv_features2d opencv_viz WRAP python)
32 changes: 31 additions & 1 deletion modules/rgbd/include/opencv2/rgbd/large_kinfu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,37 @@ class CV_EXPORTS_W LargeKinfu

virtual const Affine3f getPose() const = 0;

CV_WRAP virtual bool update(InputArray depth) = 0;
CV_WRAP virtual bool update(InputArray depth, InputArray img = noArray()) = 0;

// Set parameters for the loop closure detection function.
CV_WRAP virtual void setModelForLCD(const String& modelBin, const String& modelTxt, const Size& input_size, int backendId = 0, int targetId = 0) = 0;
};


/** @brief Loop Closing Detection implementation

This class implements a Loop Closing Detection of 3d reconstruction algorithm for
larger environments using Spatially hashed TSDF volume "Submaps".

It takes a sequence RGB images and processes each image by HF-Net.
According to the similarity of features extracted by HF-Net, determine whether there is a LOOP.
Original HF-Net was provided by: https://github.com/ethz-asl/hfnet.
Pre-trained model can be found at: https://1drv.ms/u/s!ApQBoiZSe8Evgolqw23hI8D7lP9mKw?e=JKwPHe.

*/
class CV_EXPORTS_W LoopClosureDetection {
public:

CV_WRAP static Ptr<LoopClosureDetection> create(const String& modelBin, const String& modelTxt, const Size& input_size, int backendId = 0, int targetId = 0);

virtual ~LoopClosureDetection() = default;

// Adding Frame.
// If there is loop, function will return TURE, and the tarSubmapID will be set as the target submap ID, otherwise return False.
CV_WRAP virtual bool addFrame(InputArray img, const int frameID, const int submapID, CV_OUT int& tarSubmapID) = 0;

// Stop run loop closing detection.
CV_WRAP virtual void reset() = 0;
};

} // namespace large_kinfu
Expand Down
312 changes: 312 additions & 0 deletions modules/rgbd/samples/large_kinfu_LCD_demo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html

// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this
// module's directory

#include <fstream>
#include <iostream>
#include <opencv2/3d.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/rgbd/large_kinfu.hpp>

#include "io_utils.hpp"

using namespace cv;
using namespace cv::kinfu;
using namespace cv::large_kinfu;
using namespace cv::io_utils;

#ifdef HAVE_OPENCV_VIZ
#include <opencv2/viz.hpp>
#endif

#ifdef HAVE_OPENCV_VIZ
const std::string vizWindowName = "cloud";

struct PauseCallbackArgs
{
PauseCallbackArgs(LargeKinfu& _largeKinfu) : largeKinfu(_largeKinfu) {}

LargeKinfu& largeKinfu;
};

void pauseCallback(const viz::MouseEvent& me, void* args);
void pauseCallback(const viz::MouseEvent& me, void* args)
{
if (me.type == viz::MouseEvent::Type::MouseMove ||
me.type == viz::MouseEvent::Type::MouseScrollDown ||
me.type == viz::MouseEvent::Type::MouseScrollUp)
{
PauseCallbackArgs pca = *((PauseCallbackArgs*)(args));
viz::Viz3d window(vizWindowName);
UMat rendered;
pca.largeKinfu.render(rendered, window.getViewerPose().matrix);
imshow("render", rendered);
waitKey(1);
}
}
#endif

static const char* keys = {
"{help h usage ? | | print this message }"
"{depth | | Path to folder with depth.txt and rgb.txt files listing a set of depth and rgb images. }"
"{camera |0| Index of depth camera to be used as a depth source }"
"{coarse | | Run on coarse settings (fast but ugly) or on default (slow but looks better),"
" in coarse mode points and normals are displayed }"
"{idle | | Do not run LargeKinfu, just display depth frames }"
"{record | | Write depth frames to specified file list (the same format as for the 'depth' key) }"
"{modelBin | | Path to a binary .bin file contains trained network which can be download at URL=https://1drv.ms/u/s!ApQBoiZSe8Evgolqw23hI8D7lP9mKw?e=ywHAc5}"
"{modelTxt | | Path to a .xml file contains the model definition of trained network.}"
"{width | 640 | Preprocess input image by resizing to a specific width. }"
"{height | 480 | Preprocess input image by resizing to a specific height. }"
"{backend | 2 | At current stage only openvino available, and other backend will be supported soon."
" Choose one of computation backends: "
"0: automatically (by default), "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
"3: OpenCV implementation }"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default), "
"1: OpenCL, "
"2: OpenCL fp16 (half-float precision), "
"3: VPU }"
};

static const std::string message =
"\nThis demo uses live depth input or RGB-D dataset taken from"
"\nhttps://vision.in.tum.de/data/datasets/rgbd-dataset"
"\nto demonstrate Submap based large environment reconstruction"
"\nThis module uses the newer hashtable based TSDFVolume (relatively fast) for larger "
"reconstructions by default\n"
"\n The used OpenVINO DNN model can be downdload at URL=https://1drv.ms/u/s!ApQBoiZSe8Evgolqw23hI8D7lP9mKw?e=ywHAc5.\n"
"\n Make sure that OpenVINO DNN backend is available.\n";

int main(int argc, char** argv)
{
bool coarse = false;
bool idle = false;
std::string recordPath, modelBin, modelTxt;
int backend = 0, target = 0, width = 640, height = 480;

CommandLineParser parser(argc, argv, keys);
parser.about(message);

if (!parser.check())
{
parser.printMessage();
parser.printErrors();
return -1;
}

if (parser.has("help"))
{
parser.printMessage();
return 0;
}
if (parser.has("coarse"))
{
coarse = true;
}
if (parser.has("record"))
{
recordPath = parser.get<String>("record");
}
if (parser.has("idle"))
{
idle = true;
}
if (parser.has("modelBin"))
{
modelBin = parser.get<String>("modelBin");
}
if (parser.has("modelTxt"))
{
modelTxt = parser.get<String>("modelTxt");
}
if (parser.has("width"))
{
width = parser.get<int>("width");
}
if (parser.has("height"))
{
height = parser.get<int>("height");
}
if (parser.has("backend"))
{
backend = parser.get<int>("backend");
}
if (parser.has("target"))
{
target = parser.get<int>("target");
}

Ptr<DepthSource> ds;
Ptr<RGBSource> rgbs;

if (parser.has("depth"))
ds = makePtr<DepthSource>(parser.get<String>("depth") + "/depth.txt");
else
ds = makePtr<DepthSource>(parser.get<int>("camera"));

//TODO: intrinsics for camera
rgbs = makePtr<RGBSource>(parser.get<String>("depth") + "/rgb.txt");

if (ds->empty())
{
std::cerr << "Failed to open depth source" << std::endl;
parser.printMessage();
return -1;
}
Size inputSize(width, height);
Ptr<DepthWriter> depthWriter;
if (!recordPath.empty())
depthWriter = makePtr<DepthWriter>(recordPath);

Ptr<large_kinfu::Params> params;
Ptr<LargeKinfu> largeKinfu;

params = large_kinfu::Params::hashTSDFParams(coarse);

// These params can be different for each depth sensor
ds->updateParams(*params);

cv::setUseOptimized(true);

if (!idle)
largeKinfu = LargeKinfu::create(params);

const auto& volParams = largeKinfu->getParams().volumeParams;

if (!modelBin.empty())
largeKinfu->setModelForLCD(modelBin, modelTxt, inputSize, backend, target);

#ifdef HAVE_OPENCV_VIZ
cv::viz::Viz3d window(vizWindowName);
window.setViewerPose(Affine3f::Identity());
bool pause = false;
#endif

UMat rendered;
UMat points;
UMat normals;

int64 prevTime = getTickCount();

for (UMat frame = ds->getDepth(); !frame.empty(); frame = ds->getDepth())
{
if (depthWriter)
depthWriter->append(frame);

Vec3i volResolution(volParams.resolutionX,
volParams.resolutionY,
volParams.resolutionZ);
Affine3f volPose(Matx44f(volParams.pose));

UMat rgb_frame = rgbs->getRGB();
#ifdef HAVE_OPENCV_VIZ
if (pause)
{
// doesn't happen in idle mode
largeKinfu->getCloud(points, normals);
if (!points.empty() && !normals.empty())
{
viz::WCloud cloudWidget(points, viz::Color::white());
viz::WCloudNormals cloudNormals(points, normals, /*level*/ 1, /*scale*/ 0.05,
viz::Color::gray());
window.showWidget("cloud", cloudWidget);
window.showWidget("normals", cloudNormals);

Vec3d volSize = volParams.voxelSize * Vec3d(volResolution);
window.showWidget("cube", viz::WCube(Vec3d::all(0), volSize), volPose);
PauseCallbackArgs pca(*largeKinfu);
window.registerMouseCallback(pauseCallback, (void*)&pca);
window.showWidget("text",
viz::WText(cv::String("Move camera in this window. "
"Close the window or press Q to resume"),
Point()));
window.spin();
window.removeWidget("text");
window.removeWidget("cloud");
window.removeWidget("normals");
window.registerMouseCallback(0);
}

pause = false;
}
else
#endif
{
UMat cvt8;
float depthFactor = params->depthFactor;
convertScaleAbs(frame, cvt8, 0.25 * 256. / depthFactor);
if (!idle)
{
imshow("depth", cvt8);

if (!largeKinfu->update(frame, rgb_frame))
{
largeKinfu->reset();
std::cout << "reset" << std::endl;
}
#ifdef HAVE_OPENCV_VIZ
else
{
if (coarse)
{
largeKinfu->getCloud(points, normals);
if (!points.empty() && !normals.empty())
{
viz::WCloud cloudWidget(points, viz::Color::white());
viz::WCloudNormals cloudNormals(points, normals, /*level*/ 1,
/*scale*/ 0.05, viz::Color::gray());
window.showWidget("cloud", cloudWidget);
window.showWidget("normals", cloudNormals);
}
}

// window.showWidget("worldAxes", viz::WCoordinateSystem());
Vec3d volSize = volParams.voxelSize * volResolution;
window.showWidget("cube", viz::WCube(Vec3d::all(0), volSize), volPose);
window.setViewerPose(largeKinfu->getPose());
window.spinOnce(1, true);
}
#endif

largeKinfu->render(rendered);
}
else
{
rendered = cvt8;
}
}

int64 newTime = getTickCount();
putText(rendered,
cv::format("FPS: %2d press R to reset, P to pause, Q to quit",
(int)(getTickFrequency() / (newTime - prevTime))),
Point(0, rendered.rows - 1), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 255));
prevTime = newTime;
imshow("render", rendered);

int c = waitKey(1);
switch (c)
{
case 'r':
if (!idle)
largeKinfu->reset();
break;
case 'q': return 0;
#ifdef HAVE_OPENCV_VIZ
case 'p':
if (!idle)
pause = true;
#endif
default: break;
}
}

return 0;
}
Loading