46
46
47
47
#include < vector>
48
48
#include < string>
49
- #include < iostream>
50
- #include < sstream>
51
-
52
-
53
49
54
50
namespace cv
55
51
{
@@ -540,107 +536,24 @@ at each window location.
540
536
541
537
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN (const String& filename);
542
538
543
- // ! @}
544
-
545
-
546
-
547
- // Classifiers should provide diferent backends
548
- // For the moment only caffe is implemeted
549
- enum {
550
- OCR_HOLISTIC_BACKEND_NONE,
551
- OCR_HOLISTIC_BACKEND_CAFFE
552
- };
553
-
554
-
555
- /* * @brief Abstract class that implements the classifcation of text images.
556
- *
557
- * The interface is generic enough to describe any image classifier. And allows
558
- * to take advantage of compouting in batches. While word classifiers are the default
559
- * networks, any image classifers should work.
560
- *
561
- */
562
- class CV_EXPORTS_W TextImageClassifier
563
- {
564
- protected:
565
- Size inputSz_;
566
- int channelCount_;
567
- /* * @brief all image preprocessing is handled here including whitening etc.
568
- *
569
- * @param input the image to be preprocessed for the classifier. If the depth
570
- * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
571
- *
572
- * @param output reference to the image to be fed to the classifier, the preprocessor will
573
- * resize the image to the apropriate size and convert it to the apropriate depth\
574
- *
575
- * The method preprocess should never be used externally, it is up to classify and classifyBatch
576
- * methods to employ it.
577
- */
578
- virtual void preprocess (Mat& input,Mat& output)=0;
579
- public:
580
- virtual ~TextImageClassifier () {}
581
- /* * @brief produces a class confidence row-vector given an image
582
- */
583
- CV_WRAP virtual void classify (InputArray image, OutputArray classProbabilities) = 0;
584
- /* * @brief produces a matrix containing class confidence row-vectors given an collection of images
585
- */
586
- CV_WRAP virtual void classifyBatch (InputArrayOfArrays image, OutputArray classProbabilities) = 0;
587
- /* * @brief simple getter method returning the size of the oputput row-vector
588
- */
589
- CV_WRAP virtual int getOutputSize ()=0;
590
- /* * @brief simple getter method returning the size of the minibatches for this classifier.
591
- * If not applicabe this method should return 1
592
- */
593
- CV_WRAP virtual int getMinibatchSize ()=0;
594
- /* * @brief simple getter method returning a value describing the framework beeing employed to implement the classifier
595
- */
596
- CV_WRAP virtual int getBackend (){return OCR_HOLISTIC_BACKEND_NONE;}
597
- };
598
-
599
- class CV_EXPORTS_W DictNet:public TextImageClassifier
600
- {
601
- /* * @brief Class that uses a pretrained caffe model for word classification.
602
- *
603
- * This network is described in detail in:
604
- * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
605
- * http://arxiv.org/abs/1412.1842
606
- */
607
- public:
608
- virtual ~DictNet () {};
609
-
610
- CV_WRAP virtual bool usingGpu ()=0;
611
- /* * @brief Constructs a DictNet object from a caffe pretrained model
612
- *
613
- * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
614
- *
615
- * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
616
- * very large, up to 2GB.
617
- *
618
- * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
619
- * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
620
- *
621
- * @param useGpu boolean flag setting GPU or CPU computation
622
- *
623
- * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
624
- * the only option
625
- */
626
- CV_WRAP static Ptr<DictNet> create (String archFilename,String weightsFilename,int minibatchSz=100 ,bool useGpu=0 ,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
627
- };
628
-
629
-
630
539
631
540
/* * @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
632
- * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
541
+ * Given a predefined vocabulary , a DictNet is employed to select the most probable
633
542
* word given an input image.
634
543
*
635
- * This class implements the logic of providing transcriptions given a vocabulary and and an image
636
- * classifer.
544
+ * DictNet is described in detail in:
545
+ * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
546
+ * http://arxiv.org/abs/1412.1842
637
547
*/
638
- class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
548
+ class CV_EXPORTS OCRHolisticWordRecognizer : public BaseOCR
639
549
{
640
550
public:
641
- virtual void run (Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL ,
642
- std::vector<std::string>* component_texts=NULL , std::vector<float >* component_confidences=NULL ,
643
- int component_level=OCR_LEVEL_WORD)=0;
551
+ virtual void run (Mat& image,
552
+ std::string& output_text,
553
+ std::vector<Rect>* component_rects = NULL ,
554
+ std::vector<std::string>* component_texts = NULL ,
555
+ std::vector<float >* component_confidences = NULL ,
556
+ int component_level = OCR_LEVEL_WORD) = 0;
644
557
645
558
/* * @brief Recognize text using a segmentation based word-spotting/classifier cnn.
646
559
@@ -665,68 +578,24 @@ class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
665
578
666
579
@param component_level must be OCR_LEVEL_WORD.
667
580
*/
668
-
669
- virtual void run (Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL ,
670
- std::vector<std::string>* component_texts=NULL , std::vector<float >* component_confidences=NULL ,
671
- int component_level=OCR_LEVEL_WORD)=0;
672
-
673
-
674
- /* *
675
- @brief Method that provides a quick and simple interface to a single word image classifcation
676
-
677
- @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
678
-
679
- @param transcription an opencv string that will store the detected word transcription
680
-
681
- @param confidence a double that will be updated with the confidence the classifier has for the selected word
682
- */
683
- CV_WRAP virtual void recogniseImage (InputArray inputImage,CV_OUT String& transcription,CV_OUT double & confidence)=0;
684
-
685
- /* *
686
- @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
687
- the classifiers parallel capabilities.
688
-
689
- @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
690
- to contain a single word.
691
-
692
- @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
693
- input image
694
-
695
- @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
696
- selected words.
697
- */
698
- CV_WRAP virtual void recogniseImageBatch (InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double >& confidences)=0;
699
-
700
-
701
- /* *
702
- @brief simple getted for the vocabulary employed
703
- */
704
- CV_WRAP virtual const std::vector<String>& getVocabulary ()=0;
705
-
581
+ virtual void run (Mat& image,
582
+ Mat& mask,
583
+ std::string& output_text,
584
+ std::vector<Rect>* component_rects = NULL ,
585
+ std::vector<std::string>* component_texts = NULL ,
586
+ std::vector<float >* component_confidences = NULL ,
587
+ int component_level = OCR_LEVEL_WORD) = 0;
706
588
707
589
/* * @brief Creates an instance of the OCRHolisticWordRecognizer class.
708
-
709
- @param classifierPtr an instance of TextImageClassifier, normaly a DictNet instance
710
- @param vocabullaryFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
711
- in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
712
- of the classifier.
713
590
*/
714
- CV_WRAP static Ptr<OCRHolisticWordRecognizer> create (Ptr<TextImageClassifier> classifierPtr,String vocabullaryFilename);
715
- /* * @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DictNet classifier.
716
-
717
- @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
718
- @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
719
- @param vocabullaryFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
720
- in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
721
- of the classifier.
722
- */
723
- CV_WRAP static Ptr<OCRHolisticWordRecognizer> create (String modelArchFilename, String modelWeightsFilename, String vocabullaryFilename);
724
-
591
+ static Ptr<OCRHolisticWordRecognizer> create (const std::string &archFilename,
592
+ const std::string &weightsFilename,
593
+ const std::string &wordsFilename);
725
594
};
726
595
596
+ // ! @}
727
597
728
- }
729
- }
598
+ }} // cv::text::
730
599
731
600
732
601
#endif // _OPENCV_TEXT_OCR_HPP_
0 commit comments