Skip to content

Commit a2cab07

Browse files
committed
DNN backend initial commit
1 parent 111b3be commit a2cab07

File tree

6 files changed

+511
-10
lines changed

6 files changed

+511
-10
lines changed

modules/text/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ else()
3131
message(STATUS "Glog: NO")
3232
endif()
3333

34-
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
34+
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
3535
#ocv_define_module(text ${TEXT_DEPS} WRAP python)
3636

3737
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
@@ -67,3 +67,11 @@ if()
6767
else()
6868
message(STATUS "TEXT CAFFE CONFLICT")
6969
endif()
70+
71+
if(HAVE_opencv_dnn)
72+
message(STATUS "dnn module found")
73+
add_definitions(-DHAVE_DNN)
74+
set(HAVE_DNN 1)
75+
else()
76+
message(STATUS "dnn module not found")
77+
endif()

modules/text/include/opencv2/text/ocr.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -658,9 +658,12 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas
658658

659659
//Classifiers should provide diferent backends
660660
//For the moment only caffe is implemeted
661+
661662
enum{
662-
OCR_HOLISTIC_BACKEND_NONE,
663-
OCR_HOLISTIC_BACKEND_CAFFE
663+
OCR_HOLISTIC_BACKEND_NONE, //No back end
664+
OCR_HOLISTIC_BACKEND_DNN, // dnn backend opencv_dnn
665+
OCR_HOLISTIC_BACKEND_CAFFE, // caffe based backend
666+
OCR_HOLISTIC_BACKEND_DEFAULT // to store default value based on environment
664667
};
665668

666669
class TextImageClassifier;
@@ -831,7 +834,7 @@ class CV_EXPORTS_W DeepCNN:public TextImageClassifier
831834
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
832835
* the only option
833836
*/
834-
CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
837+
CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
835838

836839
/** @brief Constructs a DeepCNN intended to be used for word spotting.
837840
*
@@ -853,7 +856,7 @@ class CV_EXPORTS_W DeepCNN:public TextImageClassifier
853856
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
854857
* the only option
855858
*/
856-
CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
859+
CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
857860

858861
};
859862

modules/text/include/opencv2/text/textDetector.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ class CV_EXPORTS_W DeepCNNTextDetector : public TextRegionDetector
160160
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
161161
* the only option
162162
*/
163-
CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
163+
CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
164164

165165
/** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
166166
*
@@ -177,7 +177,7 @@ class CV_EXPORTS_W DeepCNNTextDetector : public TextRegionDetector
177177
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
178178
* the only option
179179
*/
180-
CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
180+
CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
181181
friend class ImagePreprocessor;
182182

183183
};

modules/text/samples/textbox_demo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float
5959
int main(int argc, const char * argv[]){
6060
if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
6161
std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
62-
exit(1);
62+
//exit(1);
6363
}
6464
//set to true if you have a GPU with more than 3GB
65+
if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
6566
cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
6667

6768
if (argc < 3){

modules/text/src/ocr_holistic.cpp

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121
#include "caffe/caffe.hpp"
2222
#endif
2323

24+
#ifdef HAVE_DNN
25+
#include "opencv2/dnn.hpp"
26+
#endif
27+
28+
using namespace cv;
29+
using namespace cv::dnn;
30+
using namespace std;
2431
namespace cv { namespace text {
2532

2633
//Maybe OpenCV has a routine better suited
@@ -47,6 +54,7 @@ void ImagePreprocessor::set_mean(Mat mean){
4754
}
4855

4956

57+
5058
class ResizerPreprocessor: public ImagePreprocessor{
5159
protected:
5260
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
@@ -579,6 +587,183 @@ class DeepCNNCaffeImpl: public DeepCNN{
579587
}
580588
};
581589

590+
class DeepCNNOpenCvDNNImpl: public DeepCNN{
591+
protected:
592+
593+
void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
594+
{
595+
//Classifies a list of images containing at most minibatchSz_ images
596+
CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
597+
CV_Assert(outputMat.isContinuous());
598+
599+
#ifdef HAVE_DNN
600+
601+
std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
602+
603+
Mat preprocessed;
604+
// preprocesses each image in the inputImageList and push to preprocessedImList
605+
for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
606+
{
607+
this->preprocess(inputImageList[imgNum],preprocessed);
608+
preProcessedImList.push_back(preprocessed);
609+
}
610+
// set input data blob in dnn::net
611+
net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
612+
613+
float*outputMatData=(float*)(outputMat.data);
614+
//Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
615+
Mat outputNet = this->net_->forward();
616+
outputNet = outputNet.reshape(1, 1);
617+
618+
float*outputNetData=(float*)(outputNet.data);
619+
620+
memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
621+
622+
#endif
623+
}
624+
625+
#ifdef HAVE_DNN
626+
Ptr<Net> net_;
627+
#endif
628+
//Size inputGeometry_;
629+
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
630+
int outputSize_;
631+
public:
632+
DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
633+
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
634+
channelCount_=dn.channelCount_;
635+
inputGeometry_=dn.inputGeometry_;
636+
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
637+
#ifdef HAVE_DNN
638+
this->net_=dn.net_;
639+
#endif
640+
}
641+
DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn)
642+
{
643+
#ifdef HAVE_DNN
644+
this->net_=dn.net_;
645+
#endif
646+
this->setPreprocessor(dn.preprocessor_);
647+
this->inputGeometry_=dn.inputGeometry_;
648+
this->channelCount_=dn.channelCount_;
649+
this->minibatchSz_=dn.minibatchSz_;
650+
this->outputSize_=dn.outputSize_;
651+
this->preprocessor_=dn.preprocessor_;
652+
this->outputGeometry_=dn.outputGeometry_;
653+
return *this;
654+
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
655+
}
656+
657+
DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
658+
:minibatchSz_(maxMinibatchSz)
659+
{
660+
661+
CV_Assert(this->minibatchSz_>0);
662+
CV_Assert(fileExists(modelArchFilename));
663+
CV_Assert(fileExists(modelWeightsFilename));
664+
CV_Assert(!preprocessor.empty());
665+
this->setPreprocessor(preprocessor);
666+
#ifdef HAVE_DNN
667+
668+
this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
669+
670+
671+
672+
if (this->net_.empty())
673+
{
674+
std::cerr << "Can't load network by using the following files: " << std::endl;
675+
std::cerr << "prototxt: " << modelArchFilename << std::endl;
676+
std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
677+
//std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
678+
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
679+
exit(-1);
680+
}
681+
// find a wa to check the followings in cv::dnn ???
682+
// CV_Assert(net_->num_inputs()==1);
683+
// CV_Assert(net_->num_outputs()==1);
684+
// CV_Assert(this->net_->input_blobs()[0]->channels()==1
685+
// ||this->net_->input_blobs()[0]->channels()==3);
686+
// this->channelCount_=this->net_->input_blobs()[0]->channels();
687+
688+
689+
690+
//this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
691+
692+
//caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
693+
//inputLayerId = net_->getLayerId('data');
694+
695+
// inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
696+
// inputLayerId,
697+
// std::vector<MatShape>* inLayerShapes,
698+
// std::vector<MatShape>* outLayerShapes) const;
699+
// should not be hard coded ideally
700+
701+
this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
702+
this->channelCount_ = 1;//inputLayer->channels();
703+
704+
//inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
705+
//net_->Reshape();
706+
this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
707+
this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
708+
709+
710+
711+
712+
713+
714+
#else
715+
CV_Error(Error::StsError,"DNN module not available during compilation!");
716+
#endif
717+
}
718+
719+
void classify(InputArray image, OutputArray classProbabilities)
720+
{
721+
std::vector<Mat> inputImageList;
722+
inputImageList.push_back(image.getMat());
723+
classifyBatch(inputImageList,classProbabilities);
724+
}
725+
726+
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
727+
{
728+
std::vector<Mat> allImageVector;
729+
inputImageList.getMatVector(allImageVector);
730+
size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
731+
732+
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
733+
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
734+
Mat outputMat = classProbabilities.getMat();
735+
printf("ekhane");
736+
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
737+
{
738+
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
739+
std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
740+
std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
741+
std::vector<Mat> minibatchInput(from,to);
742+
classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
743+
744+
}
745+
746+
}
747+
748+
int getOutputSize()
749+
{
750+
return this->outputSize_;
751+
}
752+
Size getOutputGeometry()
753+
{
754+
return this->outputGeometry_;
755+
}
756+
757+
int getMinibatchSize()
758+
{
759+
return this->minibatchSz_;
760+
}
761+
762+
int getBackend()
763+
{
764+
return OCR_HOLISTIC_BACKEND_DNN;
765+
}
766+
};
582767

583768
Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
584769
{
@@ -587,9 +772,25 @@ Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<Imag
587772
preprocessor=ImagePreprocessor::createResizer();
588773
}
589774
switch(backEnd){
775+
case OCR_HOLISTIC_BACKEND_DEFAULT:
776+
777+
#ifdef HAVE_CAFFE
778+
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
779+
780+
#elif defined(HAVE_DNN)
781+
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
782+
#else
783+
CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
784+
return Ptr<DeepCNN>();
785+
#endif
786+
break;
787+
590788
case OCR_HOLISTIC_BACKEND_CAFFE:
591789
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
592790
break;
791+
case OCR_HOLISTIC_BACKEND_DNN:
792+
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
793+
break;
593794
case OCR_HOLISTIC_BACKEND_NONE:
594795
default:
595796
CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
@@ -603,9 +804,25 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
603804
{
604805
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
605806
switch(backEnd){
807+
case OCR_HOLISTIC_BACKEND_DEFAULT:
808+
809+
#ifdef HAVE_CAFFE
810+
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
811+
812+
#elif defined(HAVE_DNN)
813+
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100));
814+
#else
815+
CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
816+
return Ptr<DeepCNN>();
817+
#endif
818+
break;
819+
606820
case OCR_HOLISTIC_BACKEND_CAFFE:
607821
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
608822
break;
823+
case OCR_HOLISTIC_BACKEND_DNN:
824+
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100));
825+
break;
609826
case OCR_HOLISTIC_BACKEND_NONE:
610827
default:
611828
CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
@@ -639,6 +856,27 @@ bool getCaffeAvailable()
639856
{
640857
return true;
641858
}
859+
#elif defined(HAVE_DNN)
860+
861+
bool getCaffeGpuMode()
862+
{
863+
CV_Error(Error::StsError,"Caffe not available during compilation!");
864+
return 0;
865+
}
866+
867+
void setCaffeGpuMode(bool useGpu)
868+
{
869+
CV_Error(Error::StsError,"Caffe not available during compilation!");
870+
CV_Assert(useGpu==1);//Compilation directives force
871+
}
872+
873+
bool getCaffeAvailable(){
874+
return 0;
875+
}
876+
bool getDNNAvailable(){
877+
return true;
878+
}
879+
642880

643881
#else
644882

0 commit comments

Comments
 (0)