Skip to content

Commit 9d37cda

Browse files
authored
Merge pull request opencv#18891 from CowKeyMan:NMS_boxes_with_different_labels
Add option for NMS for boxes with different labels * DetectionModel impl * Add option for NMS for boxes with different labels In the detect function in modules/dnn/include/opencv2/dnn/dnn.hpp, whose implementation can be found at modules/dnn/src/model.cpp, the Non Max Suppression (NMS) is applied only for objects of the same label. Thus, a flag was added with the purpose to allow developers to choose if they want to keep the default implementation or wether they would like NMS to be applied to all the boxes, regardless of label. The flag is called nmsDifferentLabels, and is given a default value of false, which applies the current default implementation, thus allowing existing projects to update opencv without disruption Solves issue opencv#18832 * Change return type of set & Add default constr * Add assertions due to default constructor
1 parent 3f686a6 commit 9d37cda

File tree

3 files changed

+161
-31
lines changed

3 files changed

+161
-31
lines changed

modules/dnn/include/opencv2/dnn/dnn.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,23 @@ CV__DNN_INLINE_NS_BEGIN
12961296
*/
12971297
CV_WRAP DetectionModel(const Net& network);
12981298

1299+
CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first)
1300+
DetectionModel();
1301+
1302+
/**
1303+
* @brief nmsAcrossClasses defaults to false,
1304+
* such that when non max suppression is used during the detect() function, it will do so per-class.
1305+
* This function allows you to toggle this behaviour.
1306+
* @param[in] value The new value for nmsAcrossClasses
1307+
*/
1308+
CV_WRAP DetectionModel& setNmsAcrossClasses(bool value);
1309+
1310+
/**
1311+
* @brief Getter for nmsAcrossClasses. This variable defaults to false,
1312+
* such that when non max suppression is used during the detect() function, it will do so only per-class
1313+
*/
1314+
CV_WRAP bool getNmsAcrossClasses();
1315+
12991316
/** @brief Given the @p input frame, create input blob, run net and return result detections.
13001317
* @param[in] frame The input image.
13011318
* @param[out] classIds Class indexes in result detection.

modules/dnn/src/model.cpp

Lines changed: 88 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -320,34 +320,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask)
320320
}
321321
}
322322

323-
void disableRegionNMS(Net& net)
323+
class DetectionModel_Impl : public Model::Impl
324324
{
325-
for (String& name : net.getUnconnectedOutLayersNames())
325+
public:
326+
virtual ~DetectionModel_Impl() {}
327+
DetectionModel_Impl() : Impl() {}
328+
DetectionModel_Impl(const DetectionModel_Impl&) = delete;
329+
DetectionModel_Impl(DetectionModel_Impl&&) = delete;
330+
331+
void disableRegionNMS(Net& net)
326332
{
327-
int layerId = net.getLayerId(name);
328-
Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
329-
if (!layer.empty())
333+
for (String& name : net.getUnconnectedOutLayersNames())
330334
{
331-
layer->nmsThreshold = 0;
335+
int layerId = net.getLayerId(name);
336+
Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
337+
if (!layer.empty())
338+
{
339+
layer->nmsThreshold = 0;
340+
}
332341
}
333342
}
334-
}
343+
344+
void setNmsAcrossClasses(bool value) {
345+
nmsAcrossClasses = value;
346+
}
347+
348+
bool getNmsAcrossClasses() {
349+
return nmsAcrossClasses;
350+
}
351+
352+
private:
353+
bool nmsAcrossClasses = false;
354+
};
335355

336356
DetectionModel::DetectionModel(const String& model, const String& config)
337-
: Model(model, config)
357+
: DetectionModel(readNet(model, config))
358+
{
359+
// nothing
360+
}
361+
362+
DetectionModel::DetectionModel(const Net& network) : Model()
338363
{
339-
disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet()
364+
impl = makePtr<DetectionModel_Impl>();
365+
impl->initNet(network);
366+
impl.dynamicCast<DetectionModel_Impl>()->disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet()
367+
}
368+
369+
DetectionModel::DetectionModel() : Model()
370+
{
371+
// nothing
372+
}
373+
374+
DetectionModel& DetectionModel::setNmsAcrossClasses(bool value)
375+
{
376+
CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
377+
378+
impl.dynamicCast<DetectionModel_Impl>()->setNmsAcrossClasses(value);
379+
return *this;
340380
}
341381

342-
DetectionModel::DetectionModel(const Net& network) : Model(network)
382+
bool DetectionModel::getNmsAcrossClasses()
343383
{
344-
disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet()
384+
CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
385+
386+
return impl.dynamicCast<DetectionModel_Impl>()->getNmsAcrossClasses();
345387
}
346388

347389
void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
348390
CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes,
349391
float confThreshold, float nmsThreshold)
350392
{
393+
CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
394+
351395
std::vector<Mat> detections;
352396
impl->processFrame(frame, detections);
353397

@@ -413,7 +457,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
413457
{
414458
std::vector<int> predClassIds;
415459
std::vector<Rect> predBoxes;
416-
std::vector<float> predConf;
460+
std::vector<float> predConfidences;
417461
for (int i = 0; i < detections.size(); ++i)
418462
{
419463
// Network produces output blob with a shape NxC where N is a number of
@@ -442,45 +486,59 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
442486
height = std::max(1, std::min(height, frameHeight - top));
443487

444488
predClassIds.push_back(classIdPoint.x);
445-
predConf.push_back(static_cast<float>(conf));
489+
predConfidences.push_back(static_cast<float>(conf));
446490
predBoxes.emplace_back(left, top, width, height);
447491
}
448492
}
449493

450494
if (nmsThreshold)
451495
{
452-
std::map<int, std::vector<size_t> > class2indices;
453-
for (size_t i = 0; i < predClassIds.size(); i++)
496+
if (getNmsAcrossClasses())
454497
{
455-
if (predConf[i] >= confThreshold)
498+
std::vector<int> indices;
499+
NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices);
500+
for (int idx : indices)
456501
{
457-
class2indices[predClassIds[i]].push_back(i);
502+
boxes.push_back(predBoxes[idx]);
503+
confidences.push_back(predConfidences[idx]);
504+
classIds.push_back(predClassIds[idx]);
458505
}
459506
}
460-
for (const auto& it : class2indices)
507+
else
461508
{
462-
std::vector<Rect> localBoxes;
463-
std::vector<float> localConfidences;
464-
for (size_t idx : it.second)
509+
std::map<int, std::vector<size_t> > class2indices;
510+
for (size_t i = 0; i < predClassIds.size(); i++)
465511
{
466-
localBoxes.push_back(predBoxes[idx]);
467-
localConfidences.push_back(predConf[idx]);
512+
if (predConfidences[i] >= confThreshold)
513+
{
514+
class2indices[predClassIds[i]].push_back(i);
515+
}
468516
}
469-
std::vector<int> indices;
470-
NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
471-
classIds.resize(classIds.size() + indices.size(), it.first);
472-
for (int idx : indices)
517+
for (const auto& it : class2indices)
473518
{
474-
boxes.push_back(localBoxes[idx]);
475-
confidences.push_back(localConfidences[idx]);
519+
std::vector<Rect> localBoxes;
520+
std::vector<float> localConfidences;
521+
for (size_t idx : it.second)
522+
{
523+
localBoxes.push_back(predBoxes[idx]);
524+
localConfidences.push_back(predConfidences[idx]);
525+
}
526+
std::vector<int> indices;
527+
NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
528+
classIds.resize(classIds.size() + indices.size(), it.first);
529+
for (int idx : indices)
530+
{
531+
boxes.push_back(localBoxes[idx]);
532+
confidences.push_back(localConfidences[idx]);
533+
}
476534
}
477535
}
478536
}
479537
else
480538
{
481539
boxes = std::move(predBoxes);
482540
classIds = std::move(predClassIds);
483-
confidences = std::move(predConf);
541+
confidences = std::move(predConfidences);
484542
}
485543
}
486544
else

modules/dnn/test/test_model.cpp

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ class Test_Model : public DNNTestLayer
2525
double scoreDiff, double iouDiff,
2626
double confThreshold = 0.24, double nmsThreshold = 0.0,
2727
const Size& size = {-1, -1}, Scalar mean = Scalar(),
28-
double scale = 1.0, bool swapRB = false, bool crop = false)
28+
double scale = 1.0, bool swapRB = false, bool crop = false,
29+
bool nmsAcrossClasses = false)
2930
{
3031
checkBackend();
3132

@@ -38,6 +39,8 @@ class Test_Model : public DNNTestLayer
3839
model.setPreferableBackend(backend);
3940
model.setPreferableTarget(target);
4041

42+
model.setNmsAcrossClasses(nmsAcrossClasses);
43+
4144
std::vector<int> classIds;
4245
std::vector<float> confidences;
4346
std::vector<Rect> boxes;
@@ -177,6 +180,58 @@ TEST_P(Test_Model, DetectRegion)
177180
Scalar(), scale, swapRB);
178181
}
179182

183+
TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses)
184+
{
185+
applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB);
186+
187+
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
188+
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
189+
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
190+
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
191+
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
192+
#endif
193+
194+
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
195+
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
196+
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
197+
#endif
198+
199+
#if defined(INF_ENGINE_RELEASE)
200+
if (target == DNN_TARGET_MYRIAD
201+
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
202+
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
203+
#endif
204+
205+
std::vector<int> refClassIds = { 6, 11 };
206+
std::vector<float> refConfidences = { 0.750469f, 0.901615f };
207+
std::vector<Rect2d> refBoxes = { Rect2d(240, 53, 135, 72),
208+
Rect2d(58, 141, 117, 249) };
209+
210+
std::string img_path = _tf("dog416.png");
211+
std::string weights_file = _tf("yolo-voc.weights", false);
212+
std::string config_file = _tf("yolo-voc.cfg");
213+
214+
double scale = 1.0 / 255.0;
215+
Size size{ 416, 416 };
216+
bool swapRB = true;
217+
bool crop = false;
218+
bool nmsAcrossClasses = true;
219+
220+
double confThreshold = 0.24;
221+
double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15;
222+
double scoreDiff = 8e-5, iouDiff = 1e-5;
223+
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
224+
{
225+
scoreDiff = 1e-2;
226+
iouDiff = 1.6e-2;
227+
}
228+
229+
testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
230+
refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
231+
Scalar(), scale, swapRB, crop,
232+
nmsAcrossClasses);
233+
}
234+
180235
TEST_P(Test_Model, DetectionOutput)
181236
{
182237
#if defined(INF_ENGINE_RELEASE)

0 commit comments

Comments
 (0)