Skip to content

Commit c697e41

Browse files
committed
added calculation of output size
1 parent a2cab07 commit c697e41

File tree

6 files changed

+107
-136
lines changed

6 files changed

+107
-136
lines changed

modules/text/include/opencv2/text/ocr.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,15 @@ class CV_EXPORTS_W DeepCNN:public TextImageClassifier
861861
};
862862

863863
namespace cnn_config{
864+
865+
/** @brief runtime backend information
866+
*
867+
* this function finds the status of backends compiled with this module
868+
*
869+
* @return a list of backends (caffe,opencv-dnn etc.)
870+
* */
871+
CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
872+
864873
namespace caffe_backend{
865874

866875
/** @brief Prompts Caffe on the computation device beeing used
@@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
897906
CV_EXPORTS_W bool getCaffeAvailable();
898907

899908
}//caffe
909+
namespace dnn_backend {
910+
911+
/** @brief Provides runtime information on whether DNN module was compiled in.
912+
*
913+
* The text module API is the same regardless of whether DNN module was available or not
914+
* During compilation. When methods that require backend are invocked while no backend support
915+
* is compiled, exceptions are thrown. This method allows to test whether the
916+
* text module was built with dnn_backend during runtime.
917+
*
918+
* @return true if opencv_dnn support for the the text module was provided during compilation,
919+
* false if opencv_dnn was unavailable.
920+
*/
921+
CV_EXPORTS_W bool getDNNAvailable();
922+
923+
}//dnn_backend
900924
}//cnn_config
901925

902926
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.

modules/text/include/opencv2/text/textDetector.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ namespace cv
5656
namespace text
5757
{
5858

59-
//! @addtogroup text_recognize
59+
//! @addtogroup text_detect
6060
//! @{
6161

6262

@@ -263,7 +263,7 @@ class CV_EXPORTS_W textDetector : public BaseDetector
263263

264264
};
265265

266-
266+
//! @}
267267
}//namespace text
268268
}//namespace cv
269269

modules/text/samples/textbox_demo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
6161
std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
6262
//exit(1);
6363
}
64+
std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
65+
std::cout << "The Following backends are available" << "\n";
66+
for (int i=0;i<backends.size();i++)
67+
std::cout << backends[i] << "\n";
68+
69+
// printf("%s",x);
6470
//set to true if you have a GPU with more than 3GB
6571
if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
6672
cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
112118
}
113119
// call dict net here for all detected parts
114120
cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
115-
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
121+
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
116122

117123
cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
118124
cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
130136
cv::Point tl_ = bbox.at(i).tl();
131137
cv::Point br_ = bbox.at(i).br();
132138

133-
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
139+
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
134140

135141
}
136142
out.close();

modules/text/src/ocr_holistic.cpp

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ class StandarizerPreprocessor: public ImagePreprocessor{
122122
//void set_mean_(Mat M){}
123123

124124
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
125+
125126
//TODO put all the logic of channel and depth conversions in ImageProcessor class
126127
CV_Assert(outputChannels==1 || outputChannels==3);
127128
CV_Assert(input.channels()==1 || input.channels()==3);
@@ -433,6 +434,7 @@ class DeepCNNCaffeImpl: public DeepCNN{
433434
CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
434435
CV_Assert(outputMat.isContinuous());
435436

437+
436438
#ifdef HAVE_CAFFE
437439
net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
438440
net_->Reshape();
@@ -450,16 +452,19 @@ class DeepCNNCaffeImpl: public DeepCNN{
450452
input_channels.push_back(netInputWraped);
451453
//input_data += width * height;
452454
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
455+
453456
}
454457
this->preprocess(inputImageList[imgNum],preprocessed);
455458
split(preprocessed, input_channels);
456459

460+
457461
}
458462
this->net_->ForwardPrefilled();
459463
const float* outputNetData=net_->output_blobs()[0]->cpu_data();
460464
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
461465
int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
462466

467+
463468
//outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
464469
float*outputMatData=(float*)(outputMat.data);
465470
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@@ -470,9 +475,10 @@ class DeepCNNCaffeImpl: public DeepCNN{
470475
#ifdef HAVE_CAFFE
471476
Ptr<caffe::Net<float> > net_;
472477
#endif
473-
//Size inputGeometry_;
478+
//Size inputGeometry_;//=Size(100,32);
474479
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
475480
int outputSize_;
481+
//Size outputGeometry_;
476482
public:
477483
DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
478484
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -608,7 +614,7 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
608614
preProcessedImList.push_back(preprocessed);
609615
}
610616
// set input data blob in dnn::net
611-
net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
617+
net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
612618

613619
float*outputMatData=(float*)(outputMat.data);
614620
//Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@@ -625,9 +631,16 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
625631
#ifdef HAVE_DNN
626632
Ptr<Net> net_;
627633
#endif
628-
//Size inputGeometry_;
634+
// hard coding input image size. anything in DNN library to get that from prototxt??
635+
// Size inputGeometry_;//=Size(100,32);
629636
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
630637
int outputSize_;
638+
//Size outputGeometry_;//= Size(1,1);
639+
//int channelCount_;
640+
// int inputChannel_ ;//=1;
641+
const int _inputHeight =32;
642+
const int _inputWidth =100;
643+
const int _inputChannel =1;
631644
public:
632645
DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
633646
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -678,33 +691,17 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
678691
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
679692
exit(-1);
680693
}
681-
// find a wa to check the followings in cv::dnn ???
682-
// CV_Assert(net_->num_inputs()==1);
683-
// CV_Assert(net_->num_outputs()==1);
684-
// CV_Assert(this->net_->input_blobs()[0]->channels()==1
685-
// ||this->net_->input_blobs()[0]->channels()==3);
686-
// this->channelCount_=this->net_->input_blobs()[0]->channels();
687-
688-
689694

690-
//this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
691695

692-
//caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
693-
//inputLayerId = net_->getLayerId('data');
694-
695-
// inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
696-
// inputLayerId,
697-
// std::vector<MatShape>* inLayerShapes,
698-
// std::vector<MatShape>* outLayerShapes) const;
699-
// should not be hard coded ideally
700-
701-
this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
702-
this->channelCount_ = 1;//inputLayer->channels();
696+
this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
697+
this->channelCount_ = _inputChannel;//inputLayer->channels();
703698

704699
//inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
705-
//net_->Reshape();
706-
this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
707-
this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
700+
Ptr< Layer > outLayer= net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
701+
//std::vector<Mat> blobs = outLayer->blobs;
702+
703+
this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
704+
//this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
708705

709706

710707

@@ -732,7 +729,7 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
732729
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
733730
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
734731
Mat outputMat = classProbabilities.getMat();
735-
printf("ekhane");
732+
736733
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
737734
{
738735
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
832829
}
833830

834831
namespace cnn_config{
832+
std::vector<std::string> getAvailableBackends()
833+
{
834+
std::vector<std::string> backends;
835+
836+
#ifdef HAVE_CAFFE
837+
backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
838+
839+
#endif
840+
#ifdef HAVE_DNN
841+
backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
842+
#endif
843+
return backends;
844+
845+
846+
}
847+
835848
namespace caffe_backend{
836849

837850
#ifdef HAVE_CAFFE
@@ -856,7 +869,7 @@ bool getCaffeAvailable()
856869
{
857870
return true;
858871
}
859-
#elif defined(HAVE_DNN)
872+
#else
860873

861874
bool getCaffeGpuMode()
862875
{
@@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
873886
bool getCaffeAvailable(){
874887
return 0;
875888
}
876-
bool getDNNAvailable(){
877-
return true;
878-
}
879889

890+
#endif
880891

881-
#else
892+
}//namespace caffe
893+
namespace dnn_backend{
894+
#ifdef HAVE_DNN
882895

883-
bool getCaffeGpuMode()
884-
{
885-
CV_Error(Error::StsError,"Caffe not available during compilation!");
886-
return 0;
887-
}
888896

889-
void setCaffeGpuMode(bool useGpu)
890-
{
891-
CV_Error(Error::StsError,"Caffe not available during compilation!");
892-
CV_Assert(useGpu==1);//Compilation directives force
897+
bool getDNNAvailable(){
898+
return true;
893899
}
894-
895-
bool getCaffeAvailable(){
900+
#else
901+
bool getDNNAvailable(){
896902
return 0;
897903
}
898-
899904
#endif
900-
901-
}//namespace caffe
905+
}//namspace dnn_backend
902906
}//namespace cnn_config
903907

904908
class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@@ -931,6 +935,7 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
931935
getOutputs(buffer,nbOutputs,tmp);
932936
classNum=tmp[0].wordIdx;
933937
confidence=tmp[0].probabillity;
938+
934939
}
935940
};
936941
protected:
@@ -972,6 +977,7 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
972977
{
973978
Mat netOutput;
974979
this->classifier_->classifyBatch(inputImageList,netOutput);
980+
975981
for(int k=0;k<netOutput.rows;k++)
976982
{
977983
int classNum;

modules/text/src/text_detector.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
#include <vector>
1717

1818

19-
#ifdef HAVE_CAFFE
20-
#include "caffe/caffe.hpp"
21-
#endif
19+
//#ifdef HAVE_CAFFE
20+
//#include "caffe/caffe.hpp"
21+
//#endif
2222

2323
namespace cv { namespace text {
2424

0 commit comments

Comments
 (0)