added calculation of output size

sghoshcvc · sghoshcvc · commit c697e41b8d84 · 2017-08-28T19:25:58.000+02:00
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
@@ -861,6 +861,15 @@ class CV_EXPORTS_W DeepCNN:public TextImageClassifier
 };
 
 namespace cnn_config{
+
+/** @brief runtime backend information
+ *
+ * this function finds the status of backends compiled with this module
+ *
+ * @return a list of backends (caffe,opencv-dnn etc.)
+ * */
+CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
+
 namespace caffe_backend{
 
 /** @brief Prompts Caffe on the computation device beeing used
@@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
 CV_EXPORTS_W bool getCaffeAvailable();
 
 }//caffe
+namespace dnn_backend {
+
+/** @brief Provides runtime information on whether DNN module was compiled in.
+ *
+ * The text module API is the same regardless of whether DNN module was available or not
+ * During compilation. When methods that require backend are invocked while no backend support
+ * is compiled, exceptions are thrown. This method allows to test whether the
+ * text module was built with dnn_backend during runtime.
+ *
+ * @return true if opencv_dnn support for the the text module was provided during compilation,
+ * false if opencv_dnn was unavailable.
+ */
+CV_EXPORTS_W bool getDNNAvailable();
+
+}//dnn_backend
 }//cnn_config
 
 /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
@@ -56,7 +56,7 @@ namespace cv
 namespace text
 {
 
-//! @addtogroup text_recognize
+//! @addtogroup text_detect
 //! @{
 
 
@@ -263,7 +263,7 @@ class CV_EXPORTS_W textDetector : public BaseDetector
 
 };
 
-
+//! @}
 }//namespace text
 }//namespace cv
 
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
@@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
         std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
         //exit(1);
     }
+    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
+    std::cout << "The Following backends are available" << "\n";
+    for (int i=0;i<backends.size();i++)
+       std::cout << backends[i] << "\n";
+
+   // printf("%s",x);
     //set to true if you have a GPU with more than 3GB
      if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
     cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
     }
     // call dict net here for all detected parts
     cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
+                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
 
     cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
             cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
         cv::Point tl_ = bbox.at(i).tl();
         cv::Point br_ = bbox.at(i).br();
 
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
+        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
 
     }
     out.close();
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
@@ -122,6 +122,7 @@ class StandarizerPreprocessor: public ImagePreprocessor{
     //void set_mean_(Mat M){}
 
     void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+
         //TODO put all the logic of channel and depth conversions in ImageProcessor class
         CV_Assert(outputChannels==1 || outputChannels==3);
         CV_Assert(input.channels()==1 || input.channels()==3);
@@ -433,6 +434,7 @@ class DeepCNNCaffeImpl: public DeepCNN{
         CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
         CV_Assert(outputMat.isContinuous());
 
+
 #ifdef HAVE_CAFFE
         net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
         net_->Reshape();
@@ -450,16 +452,19 @@ class DeepCNNCaffeImpl: public DeepCNN{
                 input_channels.push_back(netInputWraped);
                 //input_data += width * height;
                 inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+
             }
             this->preprocess(inputImageList[imgNum],preprocessed);
             split(preprocessed, input_channels);
 
+
         }
         this->net_->ForwardPrefilled();
         const float* outputNetData=net_->output_blobs()[0]->cpu_data();
         this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
         int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
 
+
         //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
         float*outputMatData=(float*)(outputMat.data);
         memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@@ -470,9 +475,10 @@ class DeepCNNCaffeImpl: public DeepCNN{
 #ifdef HAVE_CAFFE
     Ptr<caffe::Net<float> > net_;
 #endif
-    //Size inputGeometry_;
+    //Size inputGeometry_;//=Size(100,32);
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     int outputSize_;
+    //Size outputGeometry_;
 public:
     DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -608,7 +614,7 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
             preProcessedImList.push_back(preprocessed);
         }
         // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
+        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
 
         float*outputMatData=(float*)(outputMat.data);
        //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@@ -625,9 +631,16 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
 #ifdef HAVE_DNN
     Ptr<Net> net_;
 #endif
-    //Size inputGeometry_;
+    // hard coding input image size. anything in DNN library to get that from prototxt??
+   // Size inputGeometry_;//=Size(100,32);
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     int outputSize_;
+    //Size outputGeometry_;//= Size(1,1);
+    //int channelCount_;
+   // int inputChannel_ ;//=1;
+    const int _inputHeight =32;
+    const int _inputWidth =100;
+    const int _inputChannel =1;
 public:
     DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -678,33 +691,17 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
             //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
             exit(-1);
         }
-// find a wa to check the followings in cv::dnn ???
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
 
-        //this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
 
-        //caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-        //inputLayerId = net_->getLayerId('data');
-
-      //  inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
-       //                                     inputLayerId,
-      //                                      std::vector<MatShape>* inLayerShapes,
-      //  std::vector<MatShape>* outLayerShapes) const;
-        // should not be hard coded ideally
-
-        this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = 1;//inputLayer->channels();
+        this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = _inputChannel;//inputLayer->channels();
 
         //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        //net_->Reshape();
-        this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
-        this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
+        //std::vector<Mat> blobs = outLayer->blobs;
+
+        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
+        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
 
 
 
@@ -732,7 +729,7 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
         size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
         classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
         Mat outputMat = classProbabilities.getMat();
-        printf("ekhane");
+
         for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
         {
             size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
 }
 
 namespace cnn_config{
+std::vector<std::string> getAvailableBackends()
+{
+    std::vector<std::string> backends;
+
+#ifdef HAVE_CAFFE
+    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
+
+#endif
+#ifdef HAVE_DNN
+    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
+#endif
+    return backends;
+
+
+}
+
 namespace caffe_backend{
 
 #ifdef HAVE_CAFFE
@@ -856,7 +869,7 @@ bool getCaffeAvailable()
 {
     return true;
 }
-#elif defined(HAVE_DNN)
+#else
 
 bool getCaffeGpuMode()
 {
@@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
 bool getCaffeAvailable(){
     return 0;
 }
-bool getDNNAvailable(){
-    return true;
-}
 
+#endif
 
-#else
+}//namespace caffe
+namespace dnn_backend{
+#ifdef  HAVE_DNN
 
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
 
-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
+bool getDNNAvailable(){
+    return true;
 }
-
-bool getCaffeAvailable(){
+#else
+bool getDNNAvailable(){
     return 0;
 }
-
 #endif
-
-}//namespace caffe
+}//namspace dnn_backend
 }//namespace cnn_config
 
 class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@@ -931,6 +935,7 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
             getOutputs(buffer,nbOutputs,tmp);
             classNum=tmp[0].wordIdx;
             confidence=tmp[0].probabillity;
+
         }
     };
 protected:
@@ -972,6 +977,7 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
     {
         Mat netOutput;
         this->classifier_->classifyBatch(inputImageList,netOutput);
+
         for(int k=0;k<netOutput.rows;k++)
         {
             int classNum;
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
@@ -16,9 +16,9 @@
 #include <vector>
 
 
-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
+//#ifdef HAVE_CAFFE
+//#include "caffe/caffe.hpp"
+//#endif
 
 namespace cv { namespace text {
 
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp