Skip to content

Commit 7031316

Browse files
committed
text: add text recognition sample
1 parent 9195d2e commit 7031316

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#include <opencv2/text.hpp>
2+
#include <opencv2/highgui.hpp>
3+
#include <opencv2/imgproc.hpp>
4+
5+
#include <iostream>
6+
#include <fstream>
7+
8+
using namespace cv;
9+
using namespace std;
10+
11+
namespace
12+
{
13+
void printHelpStr(const string& progFname)
14+
{
15+
cout << " Demo of text recognition CNN for text detection." << endl
16+
<< " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<endl<<endl
17+
<< " Usage: " << progFname << " <output_file> <input_image>" << endl
18+
<< " Caffe Model files (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<endl
19+
<< " must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << endl
20+
<< " Obtaining text recognition Caffe Model files in linux shell:" << endl
21+
<< " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel" << endl
22+
<< " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt" << endl
23+
<< " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt" <<endl << endl;
24+
}
25+
26+
bool fileExists (const string& filename)
27+
{
28+
ifstream f(filename.c_str());
29+
return f.good();
30+
}
31+
32+
void textbox_draw(Mat src, vector<Rect>& groups, vector<float>& probs, float thres)
33+
{
34+
for (size_t i = 0; i < groups.size(); i++)
35+
{
36+
if(probs[i] > thres)
37+
{
38+
if (src.type() == CV_8UC3)
39+
{
40+
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
41+
String label = format("%.2f", probs[i]);
42+
cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
43+
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
44+
}
45+
else
46+
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
47+
}
48+
}
49+
}
50+
51+
}
52+
53+
int main(int argc, const char * argv[])
54+
{
55+
if (argc < 2)
56+
{
57+
printHelpStr(argv[0]);
58+
cout << "Insufiecient parameters. Aborting!" << endl;
59+
exit(1);
60+
}
61+
62+
const string modelArch = "textbox.prototxt";
63+
const string moddelWeights = "TextBoxes_icdar13.caffemodel";
64+
65+
if (!fileExists(modelArch) || !fileExists(moddelWeights))
66+
{
67+
printHelpStr(argv[0]);
68+
cout << "Model files not found in the current directory. Aborting!" << endl;
69+
exit(1);
70+
}
71+
72+
Mat image = imread(String(argv[1]), IMREAD_COLOR);
73+
74+
cout << "Starting Text Box Demo" << endl;
75+
Ptr<text::TextDetectorCNN> textSpotter =
76+
text::TextDetectorCNN::create(modelArch, moddelWeights, false);
77+
78+
vector<Rect> bbox;
79+
vector<float> outProbabillities;
80+
textSpotter->detect(image, bbox, outProbabillities);
81+
82+
float prob_threshold = 0.6f;
83+
Mat image_copy = image.clone();
84+
textbox_draw(image_copy, bbox, outProbabillities, prob_threshold);
85+
imshow("Text detection", image_copy);
86+
image_copy = image.clone();
87+
88+
Ptr<text::OCRHolisticWordRecognizer> wordSpotter =
89+
text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
90+
91+
for(size_t i = 0; i < bbox.size(); i++)
92+
{
93+
if(outProbabillities[i] > prob_threshold)
94+
{
95+
Mat wordImg;
96+
cvtColor(image(bbox[i]), wordImg, COLOR_BGR2GRAY);
97+
string word;
98+
vector<float> confs;
99+
wordSpotter->run(wordImg, word, NULL, NULL, &confs);
100+
rectangle(image_copy, bbox[i], Scalar(0, 255, 255), 1, LINE_AA);
101+
putText(image_copy, word, bbox[i].tl(), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1, LINE_AA);
102+
}
103+
}
104+
imshow("Text recognition", image_copy);
105+
cout << "Recognition finished. Press any key to exit.\n";
106+
waitKey();
107+
return 0;
108+
}
109+

0 commit comments

Comments
 (0)