Skip to content

Commit 68736a2

Browse files
committed
Merge pull request #1384 from mshabunin:pr723
2 parents 8fbb0ec + 3aa8888 commit 68736a2

File tree

5 files changed

+225
-4
lines changed

5 files changed

+225
-4
lines changed

modules/text/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set(the_description "Text Detection and Recognition")
2-
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python java)
2+
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
33

44
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
55
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

modules/text/include/opencv2/text/ocr.hpp

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,66 @@ at each window location.
536536

537537
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
538538

539+
540+
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
541+
* Given a predefined vocabulary , a DictNet is employed to select the most probable
542+
* word given an input image.
543+
*
544+
* DictNet is described in detail in:
545+
* Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
546+
* http://arxiv.org/abs/1412.1842
547+
*/
548+
class CV_EXPORTS OCRHolisticWordRecognizer : public BaseOCR
549+
{
550+
public:
551+
virtual void run(Mat& image,
552+
std::string& output_text,
553+
std::vector<Rect>* component_rects = NULL,
554+
std::vector<std::string>* component_texts = NULL,
555+
std::vector<float>* component_confidences = NULL,
556+
int component_level = OCR_LEVEL_WORD) = 0;
557+
558+
/** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
559+
560+
Takes image on input and returns recognized text in the output_text parameter. Optionally
561+
provides also the Rects for individual text elements found (e.g. words), and the list of those
562+
text elements with their confidence values.
563+
564+
@param image Input image CV_8UC1 or CV_8UC3
565+
566+
@param mask is totally ignored and is only available for compatibillity reasons
567+
568+
@param output_text Output text of the the word spoting, always one that exists in the dictionary.
569+
570+
@param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
571+
be put in the vector.
572+
573+
@param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
574+
be put in the vector.
575+
576+
@param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
577+
be put in the vector.
578+
579+
@param component_level must be OCR_LEVEL_WORD.
580+
*/
581+
virtual void run(Mat& image,
582+
Mat& mask,
583+
std::string& output_text,
584+
std::vector<Rect>* component_rects = NULL,
585+
std::vector<std::string>* component_texts = NULL,
586+
std::vector<float>* component_confidences = NULL,
587+
int component_level = OCR_LEVEL_WORD) = 0;
588+
589+
/** @brief Creates an instance of the OCRHolisticWordRecognizer class.
590+
*/
591+
static Ptr<OCRHolisticWordRecognizer> create(const std::string &archFilename,
592+
const std::string &weightsFilename,
593+
const std::string &wordsFilename);
594+
};
595+
539596
//! @}
540597

541-
}
542-
}
598+
}} // cv::text::
599+
600+
543601
#endif // _OPENCV_TEXT_OCR_HPP_

modules/text/samples/dictnet_demo.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* dictnet_demo.cpp
3+
*
4+
* Demonstrates simple use of the holistic word classifier in C++
5+
*
6+
* Created on: June 26, 2016
7+
* Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
8+
*/
9+
10+
#include "opencv2/text.hpp"
11+
#include "opencv2/highgui.hpp"
12+
#include "opencv2/imgproc.hpp"
13+
14+
#include <sstream>
15+
#include <iostream>
16+
17+
using namespace std;
18+
using namespace cv;
19+
using namespace cv::text;
20+
21+
inline void printHelp()
22+
{
23+
cout << " Demo of wordspotting CNN for text recognition." << endl;
24+
cout << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
25+
26+
cout << " Usage: program <input_image>" << endl;
27+
cout << " Caffe Model files (dictnet_vgg.caffemodel, dictnet_vgg_deploy.prototxt, dictnet_vgg_labels.txt)"<<endl;
28+
cout << " must be in the current directory." << endl << endl;
29+
30+
cout << " Obtaining Caffe Model files in linux shell:"<<endl;
31+
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<endl;
32+
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<endl;
33+
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<endl<<endl;
34+
}
35+
36+
int main(int argc, const char * argv[])
37+
{
38+
if (argc != 2)
39+
{
40+
printHelp();
41+
exit(1);
42+
}
43+
44+
Mat image = imread(argv[1], IMREAD_GRAYSCALE);
45+
46+
cout << "Read image (" << argv[1] << "): " << image.size << ", channels: " << image.channels() << ", depth: " << image.depth() << endl;
47+
48+
if (image.empty())
49+
{
50+
printHelp();
51+
exit(1);
52+
}
53+
54+
Ptr<OCRHolisticWordRecognizer> wordSpotter = OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
55+
56+
std::string word;
57+
vector<float> confs;
58+
wordSpotter->run(image, word, 0, 0, &confs);
59+
60+
cout << "Detected word: '" << word << "', confidence: " << confs[0] << endl;
61+
}

modules/text/src/ocr_holistic.cpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#include "precomp.hpp"
2+
#include "opencv2/imgproc.hpp"
3+
#include "opencv2/core.hpp"
4+
#include "opencv2/dnn.hpp"
5+
6+
#include <fstream>
7+
8+
using namespace std;
9+
10+
namespace cv { namespace text {
11+
12+
class OCRHolisticWordRecognizerImpl : public OCRHolisticWordRecognizer
13+
{
14+
private:
15+
dnn::Net net;
16+
vector<string> words;
17+
18+
public:
19+
OCRHolisticWordRecognizerImpl(const string &archFilename, const string &weightsFilename, const string &wordsFilename)
20+
{
21+
net = dnn::readNetFromCaffe(archFilename, weightsFilename);
22+
std::ifstream in(wordsFilename.c_str());
23+
if (!in)
24+
{
25+
CV_Error(Error::StsError, "Could not read Labels from file");
26+
}
27+
std::string line;
28+
while (std::getline(in, line))
29+
words.push_back(line);
30+
CV_Assert(getClassCount() == words.size());
31+
}
32+
33+
void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0)
34+
{
35+
CV_Assert(component_level==OCR_LEVEL_WORD); //Componnents not applicable for word spotting
36+
double confidence;
37+
output_text = classify(image, confidence);
38+
if(component_rects!=NULL){
39+
component_rects->resize(1);
40+
(*component_rects)[0]=Rect(0,0,image.size().width,image.size().height);
41+
}
42+
if(component_texts!=NULL){
43+
component_texts->resize(1);
44+
(*component_texts)[0] = output_text;
45+
}
46+
if(component_confidences!=NULL){
47+
component_confidences->resize(1);
48+
(*component_confidences)[0] = float(confidence);
49+
}
50+
}
51+
52+
void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0)
53+
{
54+
//Mask is ignored because the CNN operates on a full image
55+
CV_Assert(mask.cols == image.cols && mask.rows == image.rows);
56+
this->run(image, output_text, component_rects, component_texts, component_confidences, component_level);
57+
}
58+
59+
protected:
60+
Size getPerceptiveField() const
61+
{
62+
return Size(100, 32);
63+
}
64+
65+
size_t getClassCount()
66+
{
67+
int id = net.getLayerId("prob");
68+
dnn::MatShape inputShape;
69+
inputShape.push_back(1);
70+
inputShape.push_back(1);
71+
inputShape.push_back(getPerceptiveField().height);
72+
inputShape.push_back(getPerceptiveField().width);
73+
vector<dnn::MatShape> inShapes, outShapes;
74+
net.getLayerShapes(inputShape, id, inShapes, outShapes);
75+
CV_Assert(outShapes.size() == 1 && outShapes[0].size() == 4);
76+
CV_Assert(outShapes[0][0] == 1 && outShapes[0][2] == 1 && outShapes[0][3] == 1);
77+
return outShapes[0][1];
78+
}
79+
80+
string classify(InputArray image, double & conf)
81+
{
82+
CV_Assert(image.channels() == 1 && image.depth() == CV_8U);
83+
Mat resized;
84+
resize(image, resized, getPerceptiveField());
85+
Mat blob = dnn::blobFromImage(resized);
86+
net.setInput(blob, "data");
87+
Mat prob = net.forward("prob");
88+
CV_Assert(prob.dims == 4 && !prob.empty() && prob.size[1] == (int)getClassCount());
89+
int idx[4] = {0};
90+
minMaxIdx(prob, 0, &conf, 0, idx);
91+
CV_Assert(0 <= idx[1] && idx[1] < (int)words.size());
92+
return words[idx[1]];
93+
}
94+
95+
};
96+
97+
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(const string &archFilename, const string &weightsFilename, const string &wordsFilename)
98+
{
99+
return makePtr<OCRHolisticWordRecognizerImpl>(archFilename, weightsFilename, wordsFilename);
100+
}
101+
102+
}} // cv::text::

modules/text/text_config.hpp.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
// HAVE OCR Tesseract
55
#cmakedefine HAVE_TESSERACT
66

7-
#endif
7+
#endif

0 commit comments

Comments
 (0)