Skip to content

Commit 52ca398

Browse files
committed
text: improve DL-based samples
1 parent 27961cd commit 52ca398

File tree

4 files changed

+72
-58
lines changed

4 files changed

+72
-58
lines changed

modules/text/include/opencv2/text/textDetector.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,11 @@ class CV_EXPORTS_W TextDetectorCNN : public TextDetector
5454
5555
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
5656
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
57-
@param detectMultiscale if true, multiple scales of the input image will be used as network input
57+
@param detectionSizes a list of sizes for multiscale detection. The values`[(300,300),(700,500),(700,300),(700,700),(1600,1600)]` are
58+
recommended in @cite LiaoSBWL17 to achieve the best quality.
5859
*/
59-
CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale = false);
60+
CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename,
61+
std::vector<Size> detectionSizes = std::vector<Size>(1, Size(300, 300)));
6062
};
6163

6264
//! @}

modules/text/samples/text_recognition_cnn.cpp

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <opencv2/text.hpp>
22
#include <opencv2/highgui.hpp>
33
#include <opencv2/imgproc.hpp>
4+
#include <opencv2/dnn.hpp>
45

56
#include <iostream>
67
#include <fstream>
@@ -29,22 +30,27 @@ bool fileExists (const string& filename)
2930
return f.good();
3031
}
3132

32-
void textbox_draw(Mat src, vector<Rect>& groups, vector<float>& probs, float thres)
33+
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes)
3334
{
34-
for (size_t i = 0; i < groups.size(); i++)
35+
for (size_t i = 0; i < indexes.size(); i++)
3536
{
36-
if(probs[i] > thres)
37+
if (src.type() == CV_8UC3)
3738
{
38-
if (src.type() == CV_8UC3)
39-
{
40-
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
41-
String label = format("%.2f", probs[i]);
42-
cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
43-
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
44-
}
45-
else
46-
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
39+
Rect currrentBox = groups[indexes[i]];
40+
rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
41+
String label = format("%.2f", probs[indexes[i]]);
42+
std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n";
43+
44+
int baseLine = 0;
45+
Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
46+
int yLeftBottom = std::max(currrentBox.y, labelSize.height);
47+
rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height),
48+
Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
49+
50+
putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
4751
}
52+
else
53+
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
4854
}
4955
}
5056

@@ -73,33 +79,41 @@ int main(int argc, const char * argv[])
7379

7480
cout << "Starting Text Box Demo" << endl;
7581
Ptr<text::TextDetectorCNN> textSpotter =
76-
text::TextDetectorCNN::create(modelArch, moddelWeights, false);
82+
text::TextDetectorCNN::create(modelArch, moddelWeights);
7783

7884
vector<Rect> bbox;
7985
vector<float> outProbabillities;
8086
textSpotter->detect(image, bbox, outProbabillities);
87+
std::vector<int> indexes;
88+
cv::dnn::NMSBoxes(bbox, outProbabillities, 0.4f, 0.5f, indexes);
8189

82-
float prob_threshold = 0.6f;
8390
Mat image_copy = image.clone();
84-
textbox_draw(image_copy, bbox, outProbabillities, prob_threshold);
91+
textbox_draw(image_copy, bbox, outProbabillities, indexes);
8592
imshow("Text detection", image_copy);
8693
image_copy = image.clone();
8794

8895
Ptr<text::OCRHolisticWordRecognizer> wordSpotter =
8996
text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
9097

91-
for(size_t i = 0; i < bbox.size(); i++)
98+
for(size_t i = 0; i < indexes.size(); i++)
9299
{
93-
if(outProbabillities[i] > prob_threshold)
94-
{
95-
Mat wordImg;
96-
cvtColor(image(bbox[i]), wordImg, COLOR_BGR2GRAY);
97-
string word;
98-
vector<float> confs;
99-
wordSpotter->run(wordImg, word, NULL, NULL, &confs);
100-
rectangle(image_copy, bbox[i], Scalar(0, 255, 255), 1, LINE_AA);
101-
putText(image_copy, word, bbox[i].tl(), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1, LINE_AA);
102-
}
100+
Mat wordImg;
101+
cvtColor(image(bbox[indexes[i]]), wordImg, COLOR_BGR2GRAY);
102+
string word;
103+
vector<float> confs;
104+
wordSpotter->run(wordImg, word, NULL, NULL, &confs);
105+
106+
Rect currrentBox = bbox[indexes[i]];
107+
rectangle(image_copy, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
108+
109+
int baseLine = 0;
110+
Size labelSize = getTextSize(word, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
111+
int yLeftBottom = std::max(currrentBox.y, labelSize.height);
112+
rectangle(image_copy, Point(currrentBox.x, yLeftBottom - labelSize.height),
113+
Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
114+
115+
putText(image_copy, word, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
116+
103117
}
104118
imshow("Text recognition", image_copy);
105119
cout << "Recognition finished. Press any key to exit.\n";

modules/text/samples/textbox_demo.cpp

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <opencv2/text.hpp>
22
#include <opencv2/highgui.hpp>
33
#include <opencv2/imgproc.hpp>
4+
#include <opencv2/dnn.hpp>
45

56
#include <sstream>
67
#include <iostream>
@@ -27,22 +28,27 @@ bool fileExists (const std::string& filename)
2728
return f.good();
2829
}
2930

30-
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
31+
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes)
3132
{
32-
for (size_t i = 0; i < groups.size(); i++)
33+
for (size_t i = 0; i < indexes.size(); i++)
3334
{
34-
if(probs[i] > thres)
35+
if (src.type() == CV_8UC3)
3536
{
36-
if (src.type() == CV_8UC3)
37-
{
38-
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
39-
String label = format("%.2f", probs[i]);
40-
std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
41-
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
42-
}
43-
else
44-
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
37+
Rect currrentBox = groups[indexes[i]];
38+
rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
39+
String label = format("%.2f", probs[indexes[i]]);
40+
std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n";
41+
42+
int baseLine = 0;
43+
Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
44+
int yLeftBottom = std::max(currrentBox.y, labelSize.height);
45+
rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height),
46+
Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
47+
48+
putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
4549
}
50+
else
51+
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
4652
}
4753
}
4854

@@ -62,7 +68,7 @@ int main(int argc, const char * argv[])
6268

6369
if (!fileExists(modelArch) || !fileExists(moddelWeights))
6470
{
65-
std::cout<<getHelpStr(argv[0]);
71+
std::cout << getHelpStr(argv[0]);
6672
std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
6773
exit(1);
6874
}
@@ -71,13 +77,16 @@ int main(int argc, const char * argv[])
7177

7278
std::cout << "Starting Text Box Demo" << std::endl;
7379
Ptr<text::TextDetectorCNN> textSpotter =
74-
text::TextDetectorCNN::create(modelArch, moddelWeights, false);
80+
text::TextDetectorCNN::create(modelArch, moddelWeights);
7581

7682
std::vector<Rect> bbox;
7783
std::vector<float> outProbabillities;
7884
textSpotter->detect(image, bbox, outProbabillities);
7985

80-
textbox_draw(image, bbox, outProbabillities, 0.5f);
86+
std::vector<int> indexes;
87+
cv::dnn::NMSBoxes(bbox, outProbabillities, 0.3f, 0.4f, indexes);
88+
89+
textbox_draw(image, bbox, outProbabillities, indexes);
8190

8291
imshow("TextBox Demo",image);
8392
std::cout << "Done!" << std::endl << std::endl;

modules/text/src/text_detectorCNN.cpp

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ class TextDetectorCNNImpl : public TextDetectorCNN
2323
Net net_;
2424
std::vector<Size> sizes_;
2525
int inputChannelCount_;
26-
bool detectMultiscale_;
27-
2826

2927
void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,
3028
std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape)
@@ -54,21 +52,12 @@ class TextDetectorCNNImpl : public TextDetectorCNN
5452
}
5553

5654
public:
57-
TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) :
58-
detectMultiscale_(detectMultiscale)
55+
TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, std::vector<Size> detectionSizes) :
56+
sizes_(detectionSizes)
5957
{
6058
net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename);
6159
CV_Assert(!net_.empty());
6260
inputChannelCount_ = 3;
63-
sizes_.push_back(Size(700, 700));
64-
65-
if(detectMultiscale_)
66-
{
67-
sizes_.push_back(Size(300, 300));
68-
sizes_.push_back(Size(700,500));
69-
sizes_.push_back(Size(700,300));
70-
sizes_.push_back(Size(1600,1600));
71-
}
7261
}
7362

7463
void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
@@ -92,9 +81,9 @@ class TextDetectorCNNImpl : public TextDetectorCNN
9281
}
9382
};
9483

95-
Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale)
84+
Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, std::vector<Size> detectionSizes)
9685
{
97-
return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale);
86+
return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectionSizes);
9887
}
9988
} //namespace text
10089
} //namespace cv

0 commit comments

Comments
 (0)