|
1 | 1 | #include <opencv2/text.hpp>
|
2 | 2 | #include <opencv2/highgui.hpp>
|
3 | 3 | #include <opencv2/imgproc.hpp>
|
| 4 | +#include <opencv2/dnn.hpp> |
4 | 5 |
|
5 | 6 | #include <iostream>
|
6 | 7 | #include <fstream>
|
@@ -29,22 +30,27 @@ bool fileExists (const string& filename)
|
29 | 30 | return f.good();
|
30 | 31 | }
|
31 | 32 |
|
32 |
| -void textbox_draw(Mat src, vector<Rect>& groups, vector<float>& probs, float thres) |
| 33 | +void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes) |
33 | 34 | {
|
34 |
| - for (size_t i = 0; i < groups.size(); i++) |
| 35 | + for (size_t i = 0; i < indexes.size(); i++) |
35 | 36 | {
|
36 |
| - if(probs[i] > thres) |
| 37 | + if (src.type() == CV_8UC3) |
37 | 38 | {
|
38 |
| - if (src.type() == CV_8UC3) |
39 |
| - { |
40 |
| - rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA); |
41 |
| - String label = format("%.2f", probs[i]); |
42 |
| - cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n"; |
43 |
| - putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA); |
44 |
| - } |
45 |
| - else |
46 |
| - rectangle(src, groups[i], Scalar( 255 ), 3, 8 ); |
| 39 | + Rect currrentBox = groups[indexes[i]]; |
| 40 | + rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA); |
| 41 | + String label = format("%.2f", probs[indexes[i]]); |
| 42 | + std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n"; |
| 43 | + |
| 44 | + int baseLine = 0; |
| 45 | + Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine); |
| 46 | + int yLeftBottom = std::max(currrentBox.y, labelSize.height); |
| 47 | + rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height), |
| 48 | + Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED); |
| 49 | + |
| 50 | + putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA); |
47 | 51 | }
|
| 52 | + else |
| 53 | + rectangle(src, groups[i], Scalar( 255 ), 3, 8 ); |
48 | 54 | }
|
49 | 55 | }
|
50 | 56 |
|
@@ -73,33 +79,41 @@ int main(int argc, const char * argv[])
|
73 | 79 |
|
74 | 80 | cout << "Starting Text Box Demo" << endl;
|
75 | 81 | Ptr<text::TextDetectorCNN> textSpotter =
|
76 |
| - text::TextDetectorCNN::create(modelArch, moddelWeights, false); |
| 82 | + text::TextDetectorCNN::create(modelArch, moddelWeights); |
77 | 83 |
|
78 | 84 | vector<Rect> bbox;
|
79 | 85 | vector<float> outProbabillities;
|
80 | 86 | textSpotter->detect(image, bbox, outProbabillities);
|
| 87 | + std::vector<int> indexes; |
| 88 | + cv::dnn::NMSBoxes(bbox, outProbabillities, 0.4f, 0.5f, indexes); |
81 | 89 |
|
82 |
| - float prob_threshold = 0.6f; |
83 | 90 | Mat image_copy = image.clone();
|
84 |
| - textbox_draw(image_copy, bbox, outProbabillities, prob_threshold); |
| 91 | + textbox_draw(image_copy, bbox, outProbabillities, indexes); |
85 | 92 | imshow("Text detection", image_copy);
|
86 | 93 | image_copy = image.clone();
|
87 | 94 |
|
88 | 95 | Ptr<text::OCRHolisticWordRecognizer> wordSpotter =
|
89 | 96 | text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
|
90 | 97 |
|
91 |
| - for(size_t i = 0; i < bbox.size(); i++) |
| 98 | + for(size_t i = 0; i < indexes.size(); i++) |
92 | 99 | {
|
93 |
| - if(outProbabillities[i] > prob_threshold) |
94 |
| - { |
95 |
| - Mat wordImg; |
96 |
| - cvtColor(image(bbox[i]), wordImg, COLOR_BGR2GRAY); |
97 |
| - string word; |
98 |
| - vector<float> confs; |
99 |
| - wordSpotter->run(wordImg, word, NULL, NULL, &confs); |
100 |
| - rectangle(image_copy, bbox[i], Scalar(0, 255, 255), 1, LINE_AA); |
101 |
| - putText(image_copy, word, bbox[i].tl(), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1, LINE_AA); |
102 |
| - } |
| 100 | + Mat wordImg; |
| 101 | + cvtColor(image(bbox[indexes[i]]), wordImg, COLOR_BGR2GRAY); |
| 102 | + string word; |
| 103 | + vector<float> confs; |
| 104 | + wordSpotter->run(wordImg, word, NULL, NULL, &confs); |
| 105 | + |
| 106 | + Rect currrentBox = bbox[indexes[i]]; |
| 107 | + rectangle(image_copy, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA); |
| 108 | + |
| 109 | + int baseLine = 0; |
| 110 | + Size labelSize = getTextSize(word, FONT_HERSHEY_PLAIN, 1, 1, &baseLine); |
| 111 | + int yLeftBottom = std::max(currrentBox.y, labelSize.height); |
| 112 | + rectangle(image_copy, Point(currrentBox.x, yLeftBottom - labelSize.height), |
| 113 | + Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED); |
| 114 | + |
| 115 | + putText(image_copy, word, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA); |
| 116 | + |
103 | 117 | }
|
104 | 118 | imshow("Text recognition", image_copy);
|
105 | 119 | cout << "Recognition finished. Press any key to exit.\n";
|
|
0 commit comments