Skip to content

Commit 3aa8888

Browse files
committed
Reworked HolisticWordspotter to work with dnn module
1 parent bad02f3 commit 3aa8888

File tree

9 files changed

+122
-657
lines changed

9 files changed

+122
-657
lines changed

modules/text/CMakeLists.txt

Lines changed: 1 addition & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set(the_description "Text Detection and Recognition")
2-
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python java)
2+
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
33

44
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
55
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
@@ -22,62 +22,3 @@ ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
2222
ocv_add_testdata(samples/ contrib/text
2323
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
2424
)
25-
26-
27-
#Principal source from which adaptation came is the cnn_3dobj module
28-
find_package(Caffe)
29-
30-
if(Caffe_FOUND)
31-
message(STATUS "Caffe: YES")
32-
set(HAVE_CAFFE 1)
33-
else()
34-
message(STATUS "Caffe: NO")
35-
endif()
36-
37-
find_package(Protobuf)
38-
if(Protobuf_FOUND)
39-
message(STATUS "Protobuf: YES")
40-
set(HAVE_PROTOBUF 1)
41-
else()
42-
message(STATUS "Protobuf: NO")
43-
endif()
44-
45-
find_package(Glog)
46-
if(Glog_FOUND)
47-
message(STATUS "Glog: YES")
48-
set(HAVE_GLOG 1)
49-
else()
50-
message(STATUS "Glog: NO")
51-
endif()
52-
53-
if(HAVE_CAFFE)
54-
message(STATUS "HAVE CAFFE!!!")
55-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
56-
${CMAKE_CURRENT_SOURCE_DIR}/include/opencv2/text_config.hpp @ONLY)
57-
58-
59-
include_directories(${CMAKE_CURRENT_BINARY_DIR})
60-
61-
if(${Caffe_FOUND})
62-
63-
include_directories(${Caffe_INCLUDE_DIR})
64-
#taken from caffe's cmake
65-
find_package(HDF5 COMPONENTS HL REQUIRED)
66-
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
67-
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
68-
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
69-
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
70-
include_directories(SYSTEM /usr/local/cuda-7.5/targets/x86_64-linux/include/)
71-
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
72-
73-
endif()
74-
75-
76-
if(${Caffe_FOUND})
77-
#taken from caffe's cmake
78-
target_link_libraries(opencv_text ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
79-
endif()
80-
endif()
81-
82-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
83-
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)

modules/text/FindCaffe.cmake

Lines changed: 0 additions & 14 deletions
This file was deleted.

modules/text/FindGlog.cmake

Lines changed: 0 additions & 10 deletions
This file was deleted.

modules/text/FindProtobuf.cmake

Lines changed: 0 additions & 10 deletions
This file was deleted.

modules/text/include/opencv2/text/ocr.hpp

Lines changed: 23 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@
4646

4747
#include <vector>
4848
#include <string>
49-
#include <iostream>
50-
#include <sstream>
51-
52-
5349

5450
namespace cv
5551
{
@@ -540,107 +536,24 @@ at each window location.
540536

541537
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
542538

543-
//! @}
544-
545-
546-
547-
//Classifiers should provide diferent backends
548-
//For the moment only caffe is implemeted
549-
enum{
550-
OCR_HOLISTIC_BACKEND_NONE,
551-
OCR_HOLISTIC_BACKEND_CAFFE
552-
};
553-
554-
555-
/** @brief Abstract class that implements the classifcation of text images.
556-
*
557-
* The interface is generic enough to describe any image classifier. And allows
558-
* to take advantage of compouting in batches. While word classifiers are the default
559-
* networks, any image classifers should work.
560-
*
561-
*/
562-
class CV_EXPORTS_W TextImageClassifier
563-
{
564-
protected:
565-
Size inputSz_;
566-
int channelCount_;
567-
/** @brief all image preprocessing is handled here including whitening etc.
568-
*
569-
* @param input the image to be preprocessed for the classifier. If the depth
570-
* is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
571-
*
572-
* @param output reference to the image to be fed to the classifier, the preprocessor will
573-
* resize the image to the apropriate size and convert it to the apropriate depth\
574-
*
575-
* The method preprocess should never be used externally, it is up to classify and classifyBatch
576-
* methods to employ it.
577-
*/
578-
virtual void preprocess(Mat& input,Mat& output)=0;
579-
public:
580-
virtual ~TextImageClassifier() {}
581-
/** @brief produces a class confidence row-vector given an image
582-
*/
583-
CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
584-
/** @brief produces a matrix containing class confidence row-vectors given an collection of images
585-
*/
586-
CV_WRAP virtual void classifyBatch(InputArrayOfArrays image, OutputArray classProbabilities) = 0;
587-
/** @brief simple getter method returning the size of the oputput row-vector
588-
*/
589-
CV_WRAP virtual int getOutputSize()=0;
590-
/** @brief simple getter method returning the size of the minibatches for this classifier.
591-
* If not applicabe this method should return 1
592-
*/
593-
CV_WRAP virtual int getMinibatchSize()=0;
594-
/** @brief simple getter method returning a value describing the framework beeing employed to implement the classifier
595-
*/
596-
CV_WRAP virtual int getBackend(){return OCR_HOLISTIC_BACKEND_NONE;}
597-
};
598-
599-
class CV_EXPORTS_W DictNet:public TextImageClassifier
600-
{
601-
/** @brief Class that uses a pretrained caffe model for word classification.
602-
*
603-
* This network is described in detail in:
604-
* Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
605-
* http://arxiv.org/abs/1412.1842
606-
*/
607-
public:
608-
virtual ~DictNet() {};
609-
610-
CV_WRAP virtual bool usingGpu()=0;
611-
/** @brief Constructs a DictNet object from a caffe pretrained model
612-
*
613-
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
614-
*
615-
* @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
616-
* very large, up to 2GB.
617-
*
618-
* @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
619-
* has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
620-
*
621-
* @param useGpu boolean flag setting GPU or CPU computation
622-
*
623-
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
624-
* the only option
625-
*/
626-
CV_WRAP static Ptr<DictNet> create(String archFilename,String weightsFilename,int minibatchSz=100,bool useGpu=0,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
627-
};
628-
629-
630539

631540
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
632-
* Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
541+
* Given a predefined vocabulary , a DictNet is employed to select the most probable
633542
* word given an input image.
634543
*
635-
* This class implements the logic of providing transcriptions given a vocabulary and and an image
636-
* classifer.
544+
* DictNet is described in detail in:
545+
* Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
546+
* http://arxiv.org/abs/1412.1842
637547
*/
638-
class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
548+
class CV_EXPORTS OCRHolisticWordRecognizer : public BaseOCR
639549
{
640550
public:
641-
virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
642-
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
643-
int component_level=OCR_LEVEL_WORD)=0;
551+
virtual void run(Mat& image,
552+
std::string& output_text,
553+
std::vector<Rect>* component_rects = NULL,
554+
std::vector<std::string>* component_texts = NULL,
555+
std::vector<float>* component_confidences = NULL,
556+
int component_level = OCR_LEVEL_WORD) = 0;
644557

645558
/** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
646559
@@ -665,68 +578,24 @@ class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
665578
666579
@param component_level must be OCR_LEVEL_WORD.
667580
*/
668-
669-
virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
670-
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
671-
int component_level=OCR_LEVEL_WORD)=0;
672-
673-
674-
/**
675-
@brief Method that provides a quick and simple interface to a single word image classifcation
676-
677-
@param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
678-
679-
@param transcription an opencv string that will store the detected word transcription
680-
681-
@param confidence a double that will be updated with the confidence the classifier has for the selected word
682-
*/
683-
CV_WRAP virtual void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)=0;
684-
685-
/**
686-
@brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
687-
the classifiers parallel capabilities.
688-
689-
@param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
690-
to contain a single word.
691-
692-
@param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
693-
input image
694-
695-
@param confidences a vector of double that will be updated with the confidence the classifier has for each of the
696-
selected words.
697-
*/
698-
CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
699-
700-
701-
/**
702-
@brief simple getted for the vocabulary employed
703-
*/
704-
CV_WRAP virtual const std::vector<String>& getVocabulary()=0;
705-
581+
virtual void run(Mat& image,
582+
Mat& mask,
583+
std::string& output_text,
584+
std::vector<Rect>* component_rects = NULL,
585+
std::vector<std::string>* component_texts = NULL,
586+
std::vector<float>* component_confidences = NULL,
587+
int component_level = OCR_LEVEL_WORD) = 0;
706588

707589
/** @brief Creates an instance of the OCRHolisticWordRecognizer class.
708-
709-
@param classifierPtr an instance of TextImageClassifier, normaly a DictNet instance
710-
@param vocabullaryFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
711-
in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
712-
of the classifier.
713590
*/
714-
CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,String vocabullaryFilename);
715-
/** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DictNet classifier.
716-
717-
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
718-
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
719-
@param vocabullaryFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
720-
in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
721-
of the classifier.
722-
*/
723-
CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(String modelArchFilename, String modelWeightsFilename, String vocabullaryFilename);
724-
591+
static Ptr<OCRHolisticWordRecognizer> create(const std::string &archFilename,
592+
const std::string &weightsFilename,
593+
const std::string &wordsFilename);
725594
};
726595

596+
//! @}
727597

728-
}
729-
}
598+
}} // cv::text::
730599

731600

732601
#endif // _OPENCV_TEXT_OCR_HPP_

0 commit comments

Comments
 (0)