|
| 1 | +#include <opencv2/text.hpp> |
| 2 | +#include <opencv2/highgui.hpp> |
| 3 | +#include <opencv2/imgproc.hpp> |
| 4 | + |
| 5 | +#include <iostream> |
| 6 | +#include <fstream> |
| 7 | + |
| 8 | +using namespace cv; |
| 9 | +using namespace std; |
| 10 | + |
| 11 | +namespace |
| 12 | +{ |
| 13 | +void printHelpStr(const string& progFname) |
| 14 | +{ |
| 15 | + cout << " Demo of text recognition CNN for text detection." << endl |
| 16 | + << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<endl<<endl |
| 17 | + << " Usage: " << progFname << " <output_file> <input_image>" << endl |
| 18 | + << " Caffe Model files (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<endl |
| 19 | + << " must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << endl |
| 20 | + << " Obtaining text recognition Caffe Model files in linux shell:" << endl |
| 21 | + << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel" << endl |
| 22 | + << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt" << endl |
| 23 | + << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt" <<endl << endl; |
| 24 | +} |
| 25 | + |
| 26 | +bool fileExists (const string& filename) |
| 27 | +{ |
| 28 | + ifstream f(filename.c_str()); |
| 29 | + return f.good(); |
| 30 | +} |
| 31 | + |
| 32 | +void textbox_draw(Mat src, vector<Rect>& groups, vector<float>& probs, float thres) |
| 33 | +{ |
| 34 | + for (size_t i = 0; i < groups.size(); i++) |
| 35 | + { |
| 36 | + if(probs[i] > thres) |
| 37 | + { |
| 38 | + if (src.type() == CV_8UC3) |
| 39 | + { |
| 40 | + rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA); |
| 41 | + String label = format("%.2f", probs[i]); |
| 42 | + cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n"; |
| 43 | + putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA); |
| 44 | + } |
| 45 | + else |
| 46 | + rectangle(src, groups[i], Scalar( 255 ), 3, 8 ); |
| 47 | + } |
| 48 | + } |
| 49 | +} |
| 50 | + |
| 51 | +} |
| 52 | + |
| 53 | +int main(int argc, const char * argv[]) |
| 54 | +{ |
| 55 | + if (argc < 2) |
| 56 | + { |
| 57 | + printHelpStr(argv[0]); |
| 58 | + cout << "Insufiecient parameters. Aborting!" << endl; |
| 59 | + exit(1); |
| 60 | + } |
| 61 | + |
| 62 | + const string modelArch = "textbox.prototxt"; |
| 63 | + const string moddelWeights = "TextBoxes_icdar13.caffemodel"; |
| 64 | + |
| 65 | + if (!fileExists(modelArch) || !fileExists(moddelWeights)) |
| 66 | + { |
| 67 | + printHelpStr(argv[0]); |
| 68 | + cout << "Model files not found in the current directory. Aborting!" << endl; |
| 69 | + exit(1); |
| 70 | + } |
| 71 | + |
| 72 | + Mat image = imread(String(argv[1]), IMREAD_COLOR); |
| 73 | + |
| 74 | + cout << "Starting Text Box Demo" << endl; |
| 75 | + Ptr<text::TextDetectorCNN> textSpotter = |
| 76 | + text::TextDetectorCNN::create(modelArch, moddelWeights, false); |
| 77 | + |
| 78 | + vector<Rect> bbox; |
| 79 | + vector<float> outProbabillities; |
| 80 | + textSpotter->detect(image, bbox, outProbabillities); |
| 81 | + |
| 82 | + float prob_threshold = 0.6f; |
| 83 | + Mat image_copy = image.clone(); |
| 84 | + textbox_draw(image_copy, bbox, outProbabillities, prob_threshold); |
| 85 | + imshow("Text detection", image_copy); |
| 86 | + image_copy = image.clone(); |
| 87 | + |
| 88 | + Ptr<text::OCRHolisticWordRecognizer> wordSpotter = |
| 89 | + text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt"); |
| 90 | + |
| 91 | + for(size_t i = 0; i < bbox.size(); i++) |
| 92 | + { |
| 93 | + if(outProbabillities[i] > prob_threshold) |
| 94 | + { |
| 95 | + Mat wordImg; |
| 96 | + cvtColor(image(bbox[i]), wordImg, COLOR_BGR2GRAY); |
| 97 | + string word; |
| 98 | + vector<float> confs; |
| 99 | + wordSpotter->run(wordImg, word, NULL, NULL, &confs); |
| 100 | + rectangle(image_copy, bbox[i], Scalar(0, 255, 255), 1, LINE_AA); |
| 101 | + putText(image_copy, word, bbox[i].tl(), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1, LINE_AA); |
| 102 | + } |
| 103 | + } |
| 104 | + imshow("Text recognition", image_copy); |
| 105 | + cout << "Recognition finished. Press any key to exit.\n"; |
| 106 | + waitKey(); |
| 107 | + return 0; |
| 108 | +} |
| 109 | + |
0 commit comments