From 9ae765a197d411a9016134cda0217a4a512aaabf Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Thu, 22 Jun 2017 18:31:12 +0200
Subject: [PATCH 01/31] Text detector class and Custom Image processor Class

---
 modules/text/CMakeLists.txt                   |  85 +-
 modules/text/FindCaffe.cmake                  |  14 +
 modules/text/FindGlog.cmake                   |  10 +
 modules/text/FindProtobuf.cmake               |  10 +
 modules/text/FindTesseract.cmake              |  24 +
 modules/text/README.md                        |  72 ++
 modules/text/include/opencv2/text.hpp         |   3 +-
 modules/text/include/opencv2/text/ocr.hpp     | 849 +++++++++++++----
 .../include/opencv2/text/textDetector.hpp     | 235 +++++
 modules/text/src/ocr_holistic.cpp             | 879 ++++++++++++++++++
 modules/text/src/text_detector.cpp            | 643 +++++++++++++
 modules/text/text_config.hpp.in               |  10 +-
 12 files changed, 2632 insertions(+), 202 deletions(-)
 create mode 100644 modules/text/FindCaffe.cmake
 create mode 100755 modules/text/FindGlog.cmake
 create mode 100644 modules/text/FindProtobuf.cmake
 create mode 100644 modules/text/FindTesseract.cmake
 create mode 100644 modules/text/include/opencv2/text/textDetector.hpp
 create mode 100644 modules/text/src/ocr_holistic.cpp
 create mode 100644 modules/text/src/text_detector.cpp

diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt
index 7ec4d246451..52bd828d905 100644
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@@ -1,24 +1,71 @@
 set(the_description "Text Detection and Recognition")
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python)
-
-if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
-  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-  find_package(Tesseract QUIET)
-  if(Tesseract_FOUND)
-    message(STATUS "Tesseract:   YES")
-    set(HAVE_TESSERACT 1)
-    ocv_include_directories(${Tesseract_INCLUDE_DIR})
-    ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
-  else()
-    message(STATUS "Tesseract:   NO")
-  endif()
+# Using cmake scripts and modules
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
+
+set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
+
+find_package(Caffe)
+if(Caffe_FOUND)
+  message(STATUS "Caffe:   YES")
+  set(HAVE_CAFFE 1)
+else()
+  message(STATUS "Caffe:   NO")
+#  list(APPEND TEXT_DEPS opencv_dnn)
+endif()
+
+#internal dependencies
+find_package(Protobuf)
+if(Protobuf_FOUND)
+  message(STATUS "Protobuf:   YES")
+  set(HAVE_PROTOBUF 1)
+else()
+  message(STATUS "Protobuf:   NO")
+endif()
+
+find_package(Glog)
+if(Glog_FOUND)
+  message(STATUS "Glog:   YES")
+  set(HAVE_GLOG 1)
+else()
+  message(STATUS "Glog:   NO")
+endif()
+
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
+#ocv_define_module(text ${TEXT_DEPS} WRAP python)
+
+#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
+
+find_package(Tesseract)
+if(${Tesseract_FOUND})
+  message(STATUS "Tesseract:   YES")
+  include_directories(${Tesseract_INCLUDE_DIR})
+  target_link_libraries(opencv_text ${Tesseract_LIBS})
+  add_definitions(-DHAVE_TESSERACT)
+else()
+  message(STATUS "Tesseract:   NO")
 endif()
 
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
-               ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
 
-ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
-ocv_add_testdata(samples/ contrib/text
-    FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
-)
+
+if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
+  include_directories(${Caffe_INCLUDE_DIR})
+  find_package(HDF5 COMPONENTS HL REQUIRED)
+  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
+  find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
+  include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+  include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
+  link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
+  list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
+  target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
+  add_definitions(-DHAVE_CAFFE)
+endif() #HAVE_CAFFE
+
+message(STATUS "TEXT CAFFE SEARCH")
+if()
+  message(STATUS "TEXT NO CAFFE CONFLICT")
+else()
+  message(STATUS "TEXT CAFFE CONFLICT")
+endif()
+
diff --git a/modules/text/FindCaffe.cmake b/modules/text/FindCaffe.cmake
new file mode 100644
index 00000000000..12948f62992
--- /dev/null
+++ b/modules/text/FindCaffe.cmake
@@ -0,0 +1,14 @@
+# Caffe package for CNN Triplet training
+unset(Caffe_FOUND)
+
+find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
+  HINTS
+  /usr/local/include)
+
+find_library(Caffe_LIBS NAMES caffe
+  HINTS
+  /usr/local/lib)
+
+if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
+    set(Caffe_FOUND 1)
+endif()
diff --git a/modules/text/FindGlog.cmake b/modules/text/FindGlog.cmake
new file mode 100755
index 00000000000..c30e9f4a6ab
--- /dev/null
+++ b/modules/text/FindGlog.cmake
@@ -0,0 +1,10 @@
+#Required for Caffe
+unset(Glog_FOUND)
+
+find_library(Glog_LIBS NAMES glog
+  HINTS
+  /usr/local/lib)
+
+if(Glog_LIBS)
+    set(Glog_FOUND 1)
+endif()
diff --git a/modules/text/FindProtobuf.cmake b/modules/text/FindProtobuf.cmake
new file mode 100644
index 00000000000..6d0ad56a1f7
--- /dev/null
+++ b/modules/text/FindProtobuf.cmake
@@ -0,0 +1,10 @@
+#Protobuf package required for Caffe
+unset(Protobuf_FOUND)
+
+find_library(Protobuf_LIBS NAMES protobuf
+  HINTS
+  /usr/local/lib)
+
+if(Protobuf_LIBS)
+    set(Protobuf_FOUND 1)
+endif()
diff --git a/modules/text/FindTesseract.cmake b/modules/text/FindTesseract.cmake
new file mode 100644
index 00000000000..54c4a49297d
--- /dev/null
+++ b/modules/text/FindTesseract.cmake
@@ -0,0 +1,24 @@
+# Tesseract OCR
+unset(Tesseract_FOUND)
+
+find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
+  HINTS
+  /usr/include
+  /usr/local/include)
+
+find_library(Tesseract_LIBRARY NAMES tesseract
+  HINTS
+  /usr/lib
+  /usr/local/lib)
+
+find_library(Lept_LIBRARY NAMES lept
+  HINTS
+  /usr/lib
+  /usr/local/lib)
+
+set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
+if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
+    set(Tesseract_FOUND 1)
+endif()
+
+        
diff --git a/modules/text/README.md b/modules/text/README.md
index bbbad11a165..3a3a897f7c3 100644
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -47,3 +47,75 @@ Notes
 2. Tesseract configure script may fail to detect leptonica, so you may have to edit the configure script - comment off some if's around this message and retain only "then" branch.
 
 3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages.
+
+
+Word spotting CNN
+=================
+
+Intro
+-----
+
+A word spotting CNN is a CNN that takes an image assumed to contain a single word and provides a probabillity over a given vocabulary.
+Although other backends will be supported, for the moment only the Caffe backend is supported.
+
+
+
+
+Instalation of Caffe backend
+----------------------------
+The caffe wrapping backend has the requirements caffe does.
+* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
+The simplest solution is to build caffe without support for OpenCV.
+* Only the OS supported by Caffe are supported by the backend. 
+The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
+Other UNIX systems including OSX should be easy to adapt.
+
+Sample script for building Caffe
+
+```bash
+#!/bin/bash
+SRCROOT="${HOME}/caffe_inst/"
+mkdir -p "$SRCROOT"
+cd "$SRCROOT"
+git clone https://github.com/BVLC/caffe.git
+cd caffe
+git checkout 91b09280f5233cafc62954c98ce8bc4c204e7475
+git branch 91b09280f5233cafc62954c98ce8bc4c204e7475
+cat Makefile.config.example  > Makefile.config
+echo 'USE_OPENCV := 0' >> Makefile.config
+echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
+echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
+
+
+echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
++++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
+@@ -234,6 +234,7 @@
+ 
+     template <typename T>
+     friend class Net;
++    virtual ~Callback(){}
+   };
+   const vector<Callback*>& before_forward() const { return before_forward_; }
+   void add_before_forward(Callback* value) {
+">/tmp/cleanup_caffe.diff
+
+patch < /tmp/cleanup_caffe.diff
+
+
+make -j 6
+
+make pycaffe
+
+make distribute
+```
+
+
+```bash
+#!/bin/bash
+cd $OPENCV_BUILD_DIR #You must set this
+CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
+
+cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules"   ./
+
+
+```
diff --git a/modules/text/include/opencv2/text.hpp b/modules/text/include/opencv2/text.hpp
index 945194a16b6..c4c2975b8dd 100644
--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.
 
 #include "opencv2/text/erfilter.hpp"
 #include "opencv2/text/ocr.hpp"
+#include "opencv2/text/textDetector.hpp"
 
 /** @defgroup text Scene Text Detection and Recognition
 
@@ -92,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
 in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping).
 
 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
 
     @defgroup text_recognize Scene Text Recognition
   @}
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 1261046cd07..9fc5403fdef 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -46,6 +46,10 @@
 
 #include <vector>
 #include <string>
+#include <iostream>
+#include <sstream>
+
+
 
 namespace cv
 {
@@ -61,82 +65,126 @@ enum
     OCR_LEVEL_TEXTLINE
 };
 
-//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
+//base class BaseOCR declares a common API that would be used in a typical text
+//recognition scenario
 class CV_EXPORTS_W BaseOCR
 {
-public:
+ public:
     virtual ~BaseOCR() {};
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+
+    virtual void run(Mat& image, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                      int component_level=0) = 0;
-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+
+    virtual void run(Mat& image, Mat& mask, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                      int component_level=0) = 0;
+
+    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
+     * default parameters for all parameters other than the input image.
+     */
+    virtual String run(InputArray image){
+        std::string res;
+        std::vector<Rect> component_rects;
+        std::vector<float> component_confidences;
+        std::vector<std::string> component_texts;
+        Mat inputImage=image.getMat();
+        this->run(inputImage,res,&component_rects,&component_texts,
+                  &component_confidences,OCR_LEVEL_WORD);
+        return res;
+    }
+
 };
 
-/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
+/** @brief OCRTesseract class provides an interface with the tesseract-ocr API
+ * (v3.02.02) in C++.
 
 Notice that it is compiled only when tesseract-ocr is correctly installed.
 
 @note
-   -   (C++) An example of OCRTesseract recognition combined with scene text detection can be found
-        at the end_to_end_recognition demo:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
-    -   (C++) Another example of OCRTesseract recognition combined with scene text detection can be
-        found at the webcam_demo:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+   -   (C++) An example of OCRTesseract recognition combined with scene text
+        detection can be found at the end_to_end_recognition demo:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
+    -   (C++) Another example of OCRTesseract recognition combined with scene
+        text detection can be found at the webcam_demo:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
 class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
 public:
     /** @brief Recognize text using the tesseract-ocr API.
 
-    Takes image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
+    Takes image on input and returns recognized text in the output_text
+    parameter. Optionally provides also the Rects for individual text elements
+    found (e.g. words), and the list of those text elements with their
+    confidence values.
 
     @param image Input image CV_8UC1 or CV_8UC3
+
     @param output_text Output text of the tesseract-ocr.
-    @param component_rects If provided the method will output a list of Rects for the individual
-    text elements found (e.g. words or text lines).
-    @param component_texts If provided the method will output a list of text strings for the
-    recognition of individual text elements found (e.g. words or text lines).
-    @param component_confidences If provided the method will output a list of confidence values
-    for the recognition of individual text elements found (e.g. words or text lines).
+
+    @param component_rects If provided the method will output a list of Rects
+    for the individual text elements found (e.g. words or text lines).
+
+    @param component_texts If provided the method will output a list of text
+    strings for the recognition of individual text elements found (e.g. words or
+    text lines).
+
+    @param component_confidences If provided the method will output a list of
+    confidence values for the recognition of individual text elements found
+    (e.g. words or text lines).
+
     @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
      */
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+    virtual void run (Mat& image, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                      int component_level=0);
 
-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+    virtual void run (Mat& image, Mat& mask, std::string& output_text,
+                      std::vector<Rect>* component_rects=NULL,
+                      std::vector<std::string>* component_texts=NULL,
+                      std::vector<float>* component_confidences=NULL,
+                      int component_level=0);
 
     // aliases for scripting
-    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+    CV_WRAP String run (InputArray image, int min_confidence,
+                        int component_level=0);
 
-    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image, InputArray mask,
+                       int min_confidence, int component_level=0);
 
     CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
 
 
-    /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
+    /** @brief Creates an instance of the OCRTesseract class. Initializes
+     * Tesseract.
+
+     * @param datapath the name of the parent directory of tessdata ended with
+     * "/", or NULL to use the system's default directory.
+
+     * @param language an ISO 639-3 code or NULL will default to "eng".
+
+     * @param char_whitelist specifies the list of characters used for
+     * recognition. NULL defaults to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
 
-    @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
-    system's default directory.
-    @param language an ISO 639-3 code or NULL will default to "eng".
-    @param char_whitelist specifies the list of characters used for recognition. NULL defaults to
-    "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
-    @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by deffault
-    tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
-    values.
-    @param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO
-    (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
-    possible values.
+     * @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by
+     * default tesseract::OEM_DEFAULT is used. See the tesseract-ocr API
+     * documentation for other possible values.
+
+     * @param psmode tesseract-ocr offers different Page Segmentation Modes
+     * (PSM) tesseract::PSM_AUTO (fully automatic layout analysis) is used. See
+     * the tesseract-ocr API documentation for other possible values.
      */
-    CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
-                                    const char* char_whitelist=NULL, int oem=3, int psmode=3);
+    CV_WRAP static Ptr<OCRTesseract> create (const char* datapath=NULL,
+                                             const char* language=NULL,
+                                             const char* char_whitelist=NULL,
+                                             int oem=3, int psmode=3);
 };
 
 
@@ -147,134 +195,156 @@ enum decoder_mode
     OCR_DECODER_VITERBI = 0 // Other algorithms may be added
 };
 
-/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
+/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov
+ * Models.
 
-@note
-   -   (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can
-        be found at the webcam_demo sample:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+ * @note
+ * -   (C++) An example on using OCRHMMDecoder recognition combined with scene
+ *     text detection can be found at the webcam_demo sample:
+ *      <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
-class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
-{
-public:
+class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
+ public:
 
     /** @brief Callback with the character classifier is made a class.
 
-    This way it hides the feature extractor and the classifier itself, so developers can write
-    their own OCR code.
+    * This way it hides the feature extractor and the classifier itself, so
+    * developers can write their own OCR code.
 
-    The default character classifier and feature extractor can be loaded using the utility funtion
-    loadOCRHMMClassifierNM and KNN model provided in
-    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
-     */
-    class CV_EXPORTS_W ClassifierCallback
-    {
-    public:
+    * The default character classifier and feature extractor can be loaded using
+    * the utility funtion loadOCRHMMClassifierNM and KNN model provided in
+    * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
+    */
+    class CV_EXPORTS_W ClassifierCallback{
+     public:
         virtual ~ClassifierCallback() { }
-        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
+        /** @brief The character classifier must return a (ranked list of)
+         * class(es) id('s)
 
-        @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
-        @param out_class The classifier returns the character class categorical label, or list of
-        class labels, to which the input image corresponds.
-        @param out_confidence The classifier returns the probability of the input image
-        corresponding to each classes in out_class.
+         * @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
+         * @param out_class The classifier returns the character class
+         * categorical label, or list of class labels, to which the input image
+         * corresponds.
+
+         * @param out_confidence The classifier returns the probability of the
+         * input image corresponding to each classes in out_class.
          */
-        virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence);
+        virtual void eval (InputArray image, std::vector<int>& out_class,
+                           std::vector<double>& out_confidence);
     };
 
-public:
     /** @brief Recognize text using HMM.
 
-    Takes binary image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
+    * Takes binary image on input and returns recognized text in the output_text
+    * parameter. Optionally provides also the Rects for individual text elements
+    * found (e.g. words), and the list of those text elements with their
+    * confidence values.
 
-    @param image Input binary image CV_8UC1 with a single text line (or word).
+    * @param image Input binary image CV_8UC1 with a single text line (or word).
 
-    @param output_text Output text. Most likely character sequence found by the HMM decoder.
+    * @param output_text Output text. Most likely character sequence found by
+    * the HMM decoder.
 
-    @param component_rects If provided the method will output a list of Rects for the individual
-    text elements found (e.g. words).
+    * @param component_rects If provided the method will output a list of Rects
+    * for the individual text elements found (e.g. words).
 
-    @param component_texts If provided the method will output a list of text strings for the
-    recognition of individual text elements found (e.g. words).
+    * @param component_texts If provided the method will output a list of text
+    * strings for the recognition of individual text elements found (e.g. words)
+    * .
 
-    @param component_confidences If provided the method will output a list of confidence values
-    for the recognition of individual text elements found (e.g. words).
+    * @param component_confidences If provided the method will output a list of
+    * confidence values for the recognition of individual text elements found
+    * (e.g. words).
 
-    @param component_level Only OCR_LEVEL_WORD is supported.
-     */
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+    * @param component_level Only OCR_LEVEL_WORD is supported.
+    */
+    virtual void run (Mat& image, std::string& output_text,
+                      std::vector<Rect>* component_rects=NULL,
+                      std::vector<std::string>* component_texts=NULL,
+                      std::vector<float>* component_confidences=NULL,
+                      int component_level=0);
 
     /** @brief Recognize text using HMM.
 
-    Takes an image and a mask (where each connected component corresponds to a segmented character)
-    on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
+    * Takes an image and a mask (where each connected component corresponds to a
+    * segmented character) on input and returns recognized text in the
+    * output_text parameter. Optionally provides also the Rects for individual
+    * text elements found (e.g. words), and the list of those text elements with
+    * their confidence values.
 
-    @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word).
-    @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image.
+    * @param image Input image CV_8UC1 or CV_8UC3 with a single text line
+    * (or word).
 
-    @param output_text Output text. Most likely character sequence found by the HMM decoder.
+    * @param mask Input binary image CV_8UC1 same size as input image. Each
+    * connected component in mask corresponds to a segmented character in the
+    * input image.
 
-    @param component_rects If provided the method will output a list of Rects for the individual
-    text elements found (e.g. words).
+    * @param output_text Output text. Most likely character sequence found by
+    * the HMM decoder.
 
-    @param component_texts If provided the method will output a list of text strings for the
-    recognition of individual text elements found (e.g. words).
+    * @param component_rects If provided the method will output a list of Rects
+    * for the individual text elements found (e.g. words).
 
-    @param component_confidences If provided the method will output a list of confidence values
-    for the recognition of individual text elements found (e.g. words).
+    * @param component_texts If provided the method will output a list of text
+    * strings for the recognition of individual text elements found (e.g. words)
+    * .
 
-    @param component_level Only OCR_LEVEL_WORD is supported.
-     */
-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+    * @param component_confidences If provided the method will output a list of
+    * confidence values for the recognition of individual text elements found
+    * (e.g. words).
+
+    * @param component_level Only OCR_LEVEL_WORD is supported.
+    */
+    virtual void run(Mat& image, Mat& mask, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                      int component_level=0);
 
     // aliases for scripting
-    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image,
+                       int min_confidence,
+                       int component_level=0);
 
-    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image,
+                       InputArray mask,
+                       int min_confidence,
+                       int component_level=0);
 
-    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
+    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes
+     * HMMDecoder.
 
-    @param classifier The character classifier with built in feature extractor.
+     * @param classifier The character classifier with built in feature
+     * extractor.
 
-    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
-    must be equal to the number of classes of the classifier.
+     * @param vocabulary The language vocabulary (chars when ascii english text)
+     * . vocabulary.size() must be equal to the number of classes of the
+     * classifier.
 
-    @param transition_probabilities_table Table with transition probabilities between character
-    pairs. cols == rows == vocabulary.size().
+     * @param transition_probabilities_table Table with transition probabilities
+     * between character pairs. cols == rows == vocabulary.size().
 
-    @param emission_probabilities_table Table with observation emission probabilities. cols ==
-    rows == vocabulary.size().
+     * @param emission_probabilities_table Table with observation emission
+     * probabilities. cols == rows == vocabulary.size().
 
-    @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
-    (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
+     * @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available
+     * for the moment (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
      */
-    static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
-                                     const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
-                                                                                       //     size() must be equal to the number of classes
-                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
-                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
-                                     decoder_mode mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
-
-    CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
-                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
-                                                                                       //     size() must be equal to the number of classes
-                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
-                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
-                                     int mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
-
-protected:
+    static Ptr<OCRHMMDecoder> create(
+            const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,    // The character classifier with built in feature extractor
+            const std::string& vocabulary,                              // The language vocabulary (chars when ascii english text) size() must be equal to the number of classes
+            InputArray transition_probabilities_table,                  // Table with transition probabilities between character pairs cols == rows == vocabulari.size()
+            InputArray emission_probabilities_table,                    // Table with observation emission probabilities cols == rows == vocabulari.size()
+            decoder_mode mode = OCR_DECODER_VITERBI);                   // HMM Decoding algorithm (only Viterbi for the moment)
+
+    CV_WRAP static Ptr<OCRHMMDecoder> create(
+            const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,    // The character classifier with built in feature extractor
+            const String& vocabulary,                                   // The language vocabulary (chars when ascii english text) size() must be equal to the number of classes
+            InputArray transition_probabilities_table,                  // Table with transition probabilities between character pairs cols == rows == vocabulari.size()
+            InputArray emission_probabilities_table,                    // Table with observation emission probabilities cols == rows == vocabulari.size()
+            int mode = OCR_DECODER_VITERBI);                            // HMM Decoding algorithm (only Viterbi for the moment)
+
+ protected:
 
     Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
     std::string vocabulary;
@@ -283,76 +353,98 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
     decoder_mode mode;
 };
 
-/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
+/** @brief Allow to implicitly load the default character classifier when
+ * creating an OCRHMMDecoder object.
 
-@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
+ * @param filename The XML or YAML file with the classifier model (e.g.
+ * OCRHMM_knn_model_data.xml)
 
-The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann &
-Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a
-fixed size, while retaining the centroid and aspect ratio, in order to extract a feature vector
-based on gradient orientations along the chain-code of its perimeter. Then, the region is classified
-using a KNN model trained with synthetic data of rendered characters with different standard font
-types.
+ * The KNN default classifier is based in the scene text recognition method
+ * proposed by Lukás Neumann & Jiri Matas in [Neumann11b]. Basically, the region
+ * (contour) in the input image is normalized to a fixed size, while retaining
+ * the centroid and aspect ratio, in order to extract a feature vector based on
+ * gradient orientations along the chain-code of its perimeter. Then, the region
+ * is classified using a KNN model trained with synthetic data of rendered
+ * characters with different standard font types.
  */
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM (
+        const String& filename);
 
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
+/** @brief Allow to implicitly load the default character classifier when
+ * creating an OCRHMMDecoder object.
 
-/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
+ * @param filename The XML or YAML file with the classifier model (e.g.
+ * OCRBeamSearch_CNN_model_data.xml.gz)
 
-@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)
-
-The CNN default classifier is based in the scene text recognition method proposed by Adam Coates &
-Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and
-a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
-at each window location.
+ * The CNN default classifier is based in the scene text recognition method
+ * proposed by Adam Coates & Andrew NG in [Coates11a]. The character classifier
+ * consists in a Single Layer Convolutional Neural Network and a linear
+ * classifier. It is applied to the input image in a sliding window fashion,
+ * providing a set of recognitions at each window location.
  */
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
+        const String& filename);
 
 //! @}
 
-/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
- *
+/** @brief Utility function to create a tailored language model transitions
+ * table from a given list of words (lexicon).
+
  * @param vocabulary The language vocabulary (chars when ascii english text).
- *
+
  * @param lexicon The list of words that are expected to be found in a particular image.
- *
- * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
- *
- * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+
+ * @param transition_probabilities_table Output table with transition
+ * probabilities between character pairs. cols == rows == vocabulary.size().
+
+ * The function calculate frequency statistics of character pairs from the given
+ * lexicon and fills the output transition_probabilities_table with them. The
+ * transition_probabilities_table can be used as input in the
+ * OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
  * @note
- *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
- *            <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ *    -   (C++) An alternative would be to load the default generic language
+ *        transition table provided in the text module samples folder (created
+ *        from ispell 42869 english words list) :
+ *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
  **/
-CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
-
-CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
+CV_EXPORTS void createOCRHMMTransitionsTable (
+        std::string& vocabulary, std::vector<std::string>& lexicon,
+        OutputArray transition_probabilities_table);
 
+CV_EXPORTS_W Mat createOCRHMMTransitionsTable (
+        const String& vocabulary, std::vector<cv::String>& lexicon);
 
 /* OCR BeamSearch Decoder */
 
-/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
+/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam
+ * Search algorithm.
 
 @note
-   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
-        be found at the demo sample:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with
+        scene text detection can be found at the demo sample:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
  */
-class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
-{
-public:
+
+
+/* Forward declaration of class that can be used to generate an OCRBeamSearchDecoder::ClassifierCallbac */
+class TextImageClassifier;
+
+class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
+
+ public:
 
     /** @brief Callback with the character classifier is made a class.
 
-    This way it hides the feature extractor and the classifier itself, so developers can write
-    their own OCR code.
+     * This way it hides the feature extractor and the classifier itself, so
+     * developers can write their own OCR code.
 
-    The default character classifier and feature extractor can be loaded using the utility funtion
-    loadOCRBeamSearchClassifierCNN with all its parameters provided in
-    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
+     * The default character classifier and feature extractor can be loaded
+     * using the utility funtion loadOCRBeamSearchClassifierCNN with all its
+     * parameters provided in
+     * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
      */
-    class CV_EXPORTS_W ClassifierCallback
-    {
-    public:
+    class CV_EXPORTS_W ClassifierCallback{
+     public:
         virtual ~ClassifierCallback() { }
         /** @brief The character classifier must return a (ranked list of) class(es) id('s)
 
@@ -364,8 +456,8 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
          */
         virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
 
-        int getWindowSize() {return 0;}
-        int getStepSize() {return 0;}
+        virtual int getWindowSize() {return 0;}
+        virtual int getStepSize() {return 0;}
     };
 
 public:
@@ -421,6 +513,7 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
 
     @param beam_size Size of the beam in Beam Search algorithm.
      */
+
     static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                      const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
                                                                                        //     size() must be equal to the number of classes
@@ -441,6 +534,44 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
                                      int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                      int beam_size = 500);                              // Size of the beam in Beam Search algorithm
 
+    /** @brief This method allows to plug a classifier that is derivative of TextImageClassifier in to
+     * OCRBeamSearchDecoder as a ClassifierCallback.
+
+    @param classifier A pointer to a TextImageClassifier decendent
+
+    @param alphabet The language alphabet one char per symbol. alphabet.size() must be equal to the number of classes
+    of the classifier. In future editinons it should be replaced with a vector of strings.
+
+    @param transition_probabilities_table Table with transition probabilities between character
+    pairs. cols == rows == alphabet.size().
+
+    @param emission_probabilities_table Table with observation emission probabilities. cols ==
+    rows == alphabet.size().
+
+    @param windowWidth The width of the windows to which the sliding window will be iterated. The height will
+    be the height of the image. The windows might be resized to fit the classifiers input by the classifiers
+    preprocessor.
+
+    @param windowStep The step for the sliding window
+
+    @param mode HMM Decoding algorithm (only Viterbi for the moment)
+
+    @param beam_size Size of the beam in Beam Search algorithm
+     */
+//    CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<TextImageClassifier> classifier, // The character classifier with built in feature extractor
+//                                     String alphabet,                                          // The language alphabet one char per symbol
+//                                                                                               // size() must be equal to the number of classes
+//                                     InputArray transition_probabilities_table,                // Table with transition probabilities between character pairs
+//                                                                                               //     cols == rows == alphabet.size()
+//                                     InputArray emission_probabilities_table,                  // Table with observation emission probabilities
+//                                                                                               //     cols == rows == alphabet.size()
+//                                     int windowWidth,                                          // The width of the windows to which the sliding window will be iterated.
+//                                                                                               // The height will be the height of the image. The windows might be resized to
+//                                                                                               // fit the classifiers input by the classifiers preprocessor
+//                                     int windowStep = 1 ,                                      // The step for the sliding window
+//                                     int mode = OCR_DECODER_VITERBI,                           // HMM Decoding algorithm (only Viterbi for the moment)
+//                                     int beam_size = 500);                                     // Size of the beam in Beam Search algorithm
+
 protected:
 
     Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@@ -465,6 +596,364 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas
 
 //! @}
 
-}
-}
+
+//Classifiers should provide diferent backends
+//For the moment only caffe is implemeted
+enum{
+    OCR_HOLISTIC_BACKEND_NONE,
+    OCR_HOLISTIC_BACKEND_CAFFE
+};
+
+class TextImageClassifier;
+
+/**
+ * @brief The ImagePreprocessor class
+ */
+class CV_EXPORTS_W ImagePreprocessor{
+protected:
+    virtual void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels)=0;
+    virtual void set_mean_(Mat){}
+
+public:
+    virtual ~ImagePreprocessor(){}
+
+    /** @brief this method in provides public acces to the preprocessing with respect to a specific
+     * classifier
+     *
+     * This method's main use would be to use the preprocessor without feeding it to a classifier.
+     * Determining the exact behavior of a preprocessor is the main motivation for this.
+     *
+     * @param input an image without any constraints
+     *
+     * @param output in most cases an image of fixed depth size and whitened
+     *
+     * @param sz the size to which the image would be resize if the preprocessor resizes inputs
+     *
+     * @param outputChannels the number of channels for the output image
+     */
+    CV_WRAP void preprocess(InputArray input,OutputArray output,Size sz,int outputChannels);
+
+    CV_WRAP void set_mean(Mat mean);
+
+    /** @brief Creates a functor that only resizes and changes the channels of the input
+     *  without further processing.
+     *
+     * @return shared pointer to the generated preprocessor
+     */
+    CV_WRAP static Ptr<ImagePreprocessor> createResizer();
+
+    /** @brief
+     *
+     * @param sigma
+     *
+     * @return shared pointer to generated preprocessor
+     */
+    CV_WRAP static Ptr<ImagePreprocessor> createImageStandarizer(double sigma);
+
+    /** @brief
+     *
+     * @return shared pointer to generated preprocessor
+     */
+    CV_WRAP static Ptr<ImagePreprocessor> createImageMeanSubtractor(InputArray meanImg);
+
+    CV_WRAP static Ptr<ImagePreprocessor>createImageCustomPreprocessor(double rawval=1.0,String channel_order="BGR");
+
+    friend class TextImageClassifier;
+
+};
+
+/** @brief Abstract class that implements the classifcation of text images.
+ *
+ * The interface is generic enough to describe any image classifier. And allows
+ * to take advantage of compouting in batches. While word classifiers are the default
+ * networks, any image classifers should work.
+ *
+ */
+class CV_EXPORTS_W TextImageClassifier
+{
+protected:
+    Size inputGeometry_;
+    Size outputGeometry_;
+    int channelCount_;
+    Ptr<ImagePreprocessor> preprocessor_;
+    /** @brief all image preprocessing is handled here including whitening etc.
+     *
+         *  @param input the image to be preprocessed for the classifier. If the depth
+     * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
+     *
+     * @param output reference to the image to be fed to the classifier, the preprocessor will
+     * resize the image to the apropriate size and convert it to the apropriate depth\
+     *
+     * The method preprocess should never be used externally, it is up to classify and classifyBatch
+     * methods to employ it.
+     */
+    virtual void preprocess(const Mat& input,Mat& output);
+public:
+    virtual ~TextImageClassifier() {}
+
+    /** @brief
+     */
+    CV_WRAP virtual void setPreprocessor(Ptr<ImagePreprocessor> ptr);
+
+    /** @brief
+     */
+    CV_WRAP Ptr<ImagePreprocessor> getPreprocessor();
+
+    /** @brief produces a class confidence row-vector given an image
+     */
+    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
+    /** @brief produces a list of bounding box given an image
+     */
+
+    CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;
+
+    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
+     */
+    CV_WRAP virtual void classifyBatch(InputArrayOfArrays image, OutputArray classProbabilities) = 0;
+
+    /** @brief simple getter method returning the number of channels each input sample has
+     */
+    CV_WRAP virtual int getInputChannelCount(){return this->channelCount_;}
+
+    /** @brief simple getter method returning the size of the input sample
+     */
+    CV_WRAP virtual Size getInputSize(){return this->inputGeometry_;}
+
+    /** @brief simple getter method returning the size of the oputput row-vector
+     */
+    CV_WRAP virtual int getOutputSize()=0;
+    CV_WRAP virtual Size getOutputGeometry()=0;
+
+    /** @brief simple getter method returning the size of the minibatches for this classifier.
+     * If not applicabe this method should return 1
+     */
+    CV_WRAP virtual int getMinibatchSize()=0;
+
+    friend class ImagePreprocessor;
+};
+
+
+
+class CV_EXPORTS_W DeepCNN:public TextImageClassifier
+{
+    /** @brief Class that uses a pretrained caffe model for word classification.
+     *
+     * This network is described in detail in:
+     * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
+     * http://arxiv.org/abs/1412.1842
+     */
+public:
+    virtual ~DeepCNN() {};
+
+    /** @brief Constructs a DeepCNN object from a caffe pretrained model
+     *
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
+     * very large, up to 2GB.
+     *
+     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
+     *
+     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
+     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+
+    /** @brief Constructs a DeepCNN intended to be used for word spotting.
+     *
+     * This method loads a pretrained classifier and couples him with a preprocessor that standarises pixels with a
+     * deviation of 113. The architecture file can be downloaded from:
+     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
+     * While the weights can be downloaded from:
+     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
+     * The words assigned to the network outputs are available at:
+     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
+     *
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model. When employing
+     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file. This file can be very large, the
+     * pretrained DictNet uses 2GB.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+
+};
+
+namespace cnn_config{
+namespace caffe_backend{
+
+/** @brief Prompts Caffe on the computation device beeing used
+ *
+ * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
+ * global behavior. This function queries the current state of caffe.
+ * If the module is built without caffe, this method throws an exception.
+ *
+ * @return true if caffe is computing on the GPU, false if caffe is computing on the CPU
+ */
+CV_EXPORTS_W bool getCaffeGpuMode();
+
+/** @brief Sets the computation device beeing used by Caffe
+ *
+ * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
+ * global behavior. This function queries the current state of caffe.
+ * If the module is built without caffe, this method throws an exception.
+ *
+ * @param useGpu  set to true for caffe to be computing on the GPU, false if caffe is
+ * computing on the CPU
+ */
+CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
+
+/** @brief Provides runtime information on whether Caffe support was compiled in.
+ *
+ * The text module API is the same regardless of whether CAffe was available or not
+ * During compilation. When methods that require Caffe are invocked while Caffe support
+ * is not compiled in, exceptions are thrown. This method allows to test whether the
+ * text module was built with caffe during runtime.
+ *
+ * @return true if Caffe support for the the text module was provided during compilation,
+ * false if Caffe was unavailable.
+ */
+CV_EXPORTS_W bool getCaffeAvailable();
+
+}//caffe
+}//cnn_config
+
+/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
+ * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
+ * word given an input image.
+ *
+ * This class implements the logic of providing transcriptions given a vocabulary and and an image
+ * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
+ * class was built is the DictNet. In order to load it the following files should be downloaded:
+
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
+ */
+class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
+{
+public:
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 or CV_8UC3
+
+    @param mask is totally ignored and is only available for compatibillity reasons
+
+    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
+
+    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_level must be OCR_LEVEL_WORD.
+     */
+
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+
+    /**
+    @brief Method that provides a quick and simple interface to a single word image classifcation
+
+    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
+
+    @param transcription an opencv string that will store the detected word transcription
+
+    @param confidence a double that will be updated with the confidence the classifier has for the selected word
+    */
+    CV_WRAP virtual void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)=0;
+
+    /**
+    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
+    the classifiers parallel capabilities.
+
+    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
+    to contain a single word.
+
+    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
+    input image
+
+    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
+    selected words.
+    */
+    CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
+
+
+    /**
+    @brief simple getter for the vocabulary employed
+    */
+    CV_WRAP virtual const std::vector<String>& getVocabulary()=0;
+
+    /** @brief simple getter for the preprocessing functor
+     */
+    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
+
+    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+     */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename);
+
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
+
+    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
+
+    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+    */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename);
+
+    /** @brief
+     *
+     * @param classifierPtr
+     *
+     * @param vocabulary
+     */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
+
+    /** @brief
+     *
+     * @param modelArchFilename
+     *
+     * @param modelWeightsFilename
+     *
+     * @param vocabulary
+     */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
+};
+
+
+}//namespace text
+}//namespace cv
+
+
 #endif // _OPENCV_TEXT_OCR_HPP_
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
new file mode 100644
index 00000000000..262795733d9
--- /dev/null
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -0,0 +1,235 @@
+/*M//////////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
+#define __OPENCV_TEXT_TEXTDETECTOR_HPP__
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include"ocr.hpp"
+
+
+namespace cv
+{
+namespace text
+{
+
+//! @addtogroup text_recognize
+//! @{
+
+
+
+//base class BaseDetector declares a common API that would be used in a typical text
+//recognition scenario
+class CV_EXPORTS_W BaseDetector
+{
+ public:
+    virtual ~BaseDetector() {};
+
+    virtual void run(Mat& image,
+                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
+
+    virtual void run(Mat& image, Mat& mask,
+                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
+
+    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
+     * default parameters for all parameters other than the input image.
+     */
+//    virtual std::vector<Rect>* run(InputArray image){
+//        //std::string res;
+//        std::vector<Rect> component_rects;
+//        std::vector<float> component_confidences;
+//        //std::vector<std::string> component_texts;
+//        Mat inputImage=image.getMat();
+//        this->run(inputImage,&component_rects,
+//                  &component_confidences,OCR_LEVEL_WORD);
+//        return *component_rects;
+//    }
+
+};
+
+
+//Classifiers should provide diferent backends
+//For the moment only caffe is implemeted
+//enum{
+//    OCR_HOLISTIC_BACKEND_NONE,
+//    OCR_HOLISTIC_BACKEND_CAFFE
+//};
+
+
+
+
+
+/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
+ * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
+ * word given an input image.
+ *
+ * This class implements the logic of providing transcriptions given a vocabulary and and an image
+ * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
+ * class was built is the DictNet. In order to load it the following files should be downloaded:
+
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
+ */
+class CV_EXPORTS_W textDetector : public BaseDetector
+{
+public:
+    virtual void run(Mat& image,  std::vector<Rect>* component_rects=NULL,
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 or CV_8UC3
+
+    @param mask is totally ignored and is only available for compatibillity reasons
+
+    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
+
+    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_level must be OCR_LEVEL_WORD.
+     */
+
+    virtual void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+
+    /**
+    @brief Method that provides a quick and simple interface to a single word image classifcation
+
+    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size
+
+    @param transcription an opencv string that will store the detected word transcription
+
+    @param confidence a double that will be updated with the confidence the classifier has for the selected word
+    */
+    CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
+
+    /**
+    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
+    the classifiers parallel capabilities.
+
+    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
+    to contain a single word.
+
+    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
+    input image
+
+    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
+    selected words.
+    */
+    //CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
+
+
+   /** @brief simple getter for the preprocessing functor
+     */
+    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
+
+    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+     */
+    CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
+
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
+
+    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
+
+    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+    */
+    CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
+
+    /** @brief
+     *
+     * @param classifierPtr
+     *
+     * @param vocabulary
+     */
+ //   CV_WRAP static Ptr<textDetectImage> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
+
+    /** @brief
+     *
+     * @param modelArchFilename
+     *
+     * @param modelWeightsFilename
+     *
+     * @param vocabulary
+     */
+ //   CV_WRAP static Ptr<textDetectImage> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
+};
+
+
+}//namespace text
+}//namespace cv
+
+
+#endif // _OPENCV_TEXT_OCR_HPP_
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
new file mode 100644
index 00000000000..9791e62bbf5
--- /dev/null
+++ b/modules/text/src/ocr_holistic.cpp
@@ -0,0 +1,879 @@
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+#include  "opencv2/highgui.hpp"
+#include "opencv2/core.hpp"
+
+
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <queue>
+#include <algorithm>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+
+#ifdef HAVE_CAFFE
+#include "caffe/caffe.hpp"
+#endif
+
+namespace cv { namespace text {
+
+//Maybe OpenCV has a routine better suited
+inline bool fileExists (String filename) {
+    std::ifstream f(filename.c_str());
+    return f.good();
+}
+
+//************************************************************************************
+//******************   ImagePreprocessor   *******************************************
+//************************************************************************************
+
+void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
+    Mat inpImg=input.getMat();
+    Mat outImg;
+    this->preprocess_(inpImg,outImg,sz,outputChannels);
+    outImg.copyTo(output);
+}
+void ImagePreprocessor::set_mean(Mat mean){
+
+
+    this->set_mean_(mean);
+
+}
+
+
+class ResizerPreprocessor: public ImagePreprocessor{
+protected:
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1){
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U){
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+    }
+    //void set_mean_(Mat m){}
+public:
+    ResizerPreprocessor(){}
+    ~ResizerPreprocessor(){}
+};
+
+class StandarizerPreprocessor: public ImagePreprocessor{
+protected:
+    double sigma_;
+    //void set_mean_(Mat M){}
+
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+
+        Scalar mean,dev;
+        meanStdDev(output,mean,dev);
+        subtract(output,mean[0],output);
+        divide(output,(dev[0]/sigma_),output);
+    }
+public:
+    StandarizerPreprocessor(double sigma):sigma_(sigma){}
+    ~StandarizerPreprocessor(){}
+
+};
+
+class customPreprocessor:public ImagePreprocessor{
+protected:
+
+    double rawval_;
+    Mat mean_;
+    String channel_order_;
+
+    void set_mean_(Mat imMean_){
+
+        imMean_.copyTo(this->mean_);
+
+
+    }
+
+    void set_raw_scale(int rawval){
+        rawval_ = rawval;
+
+    }
+    void set_channels(String channel_order){
+        channel_order_=channel_order;
+    }
+
+
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        tmpInput.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        tmpInput.convertTo(output, CV_32FC1);
+                    else
+                        tmpInput.convertTo(output, CV_32FC1,rawval_);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        tmpInput.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        tmpInput.convertTo(output, CV_32FC1);
+                    else
+                        tmpInput.convertTo(output, CV_32FC1,rawval_);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        input.convertTo(output,CV_32FC1,1/255.0);
+                    else
+                        input.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        input.convertTo(output, CV_32FC1);
+                    else
+                        input.convertTo(output, CV_32FC1,rawval_);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        input.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        input.convertTo(output,CV_32FC3);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        input.convertTo(output, CV_32FC3);
+                    else
+                        input.convertTo(output, CV_32FC3,rawval_);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+
+        if (!this->mean_.empty()){
+
+            Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2));
+            subtract(output,mean_s,output);
+        }
+        else{
+            Scalar mean_s;
+            mean_s = mean(output);
+            subtract(output,mean_s,output);
+        }
+
+    }
+
+public:
+    customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){}
+    ~customPreprocessor(){}
+
+};
+
+class MeanSubtractorPreprocessor: public ImagePreprocessor{
+protected:
+    Mat mean_;
+    //void set_mean_(Mat m){}
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+        subtract(output,this->mean_,output);
+    }
+public:
+    MeanSubtractorPreprocessor(Mat mean)
+    {
+        mean.copyTo(this->mean_);
+    }
+
+    ~MeanSubtractorPreprocessor(){}
+};
+
+
+
+
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
+{
+    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
+{
+    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
+}
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order)
+{
+
+    return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order));
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
+{
+    Mat tmp=meanImg.getMat();
+    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
+}
+
+//************************************************************************************
+//******************   TextImageClassifier   *****************************************
+//************************************************************************************
+
+void TextImageClassifier::preprocess(const Mat& input,Mat& output)
+{
+    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
+}
+
+void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
+{
+    CV_Assert(!ptr.empty());
+    preprocessor_=ptr;
+}
+
+Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
+{
+    return preprocessor_;
+}
+
+
+class DeepCNNCaffeImpl: public DeepCNN{
+protected:
+    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
+    {
+        //Classifies a list of images containing at most minibatchSz_ images
+        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
+        CV_Assert(outputMat.isContinuous());
+
+#ifdef HAVE_CAFFE
+        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+        net_->Reshape();
+        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+        float* inputData=inputBuffer;
+
+        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
+        {
+            std::vector<Mat> input_channels;
+            Mat preprocessed;
+            // if the image have multiple color channels the input layer should be populated accordingly
+            for (int channel=0;channel < this->channelCount_;channel++){
+
+                cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+                input_channels.push_back(netInputWraped);
+                //input_data += width * height;
+                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+            }
+            this->preprocess(inputImageList[imgNum],preprocessed);
+            split(preprocessed, input_channels);
+
+        }
+        this->net_->ForwardPrefilled();
+        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
+
+        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
+        float*outputMatData=(float*)(outputMat.data);
+        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
+
+#endif
+    }
+
+    void process_(Mat inputImage, Mat &outputMat)
+    {
+        // do forward pass and stores the output in outputMat
+        //Process one image
+        CV_Assert(this->minibatchSz_==1);
+        //CV_Assert(outputMat.isContinuous());
+
+#ifdef HAVE_CAFFE
+        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+        net_->Reshape();
+        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+        float* inputData=inputBuffer;
+
+        std::vector<Mat> input_channels;
+        Mat preprocessed;
+        // if the image have multiple color channels the input layer should be populated accordingly
+        for (int channel=0;channel < this->channelCount_;channel++){
+
+            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+            input_channels.push_back(netInputWraped);
+            //input_data += width * height;
+            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+        }
+        this->preprocess(inputImage,preprocessed);
+        split(preprocessed, input_channels);
+
+        //preprocessed.copyTo(netInputWraped);
+
+
+        this->net_->Forward();
+        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
+
+
+
+
+        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
+        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+        float*outputMatData=(float*)(outputMat.data);
+
+        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+
+
+
+#endif
+    }
+
+
+
+#ifdef HAVE_CAFFE
+    Ptr<caffe::Net<float> > net_;
+#endif
+    //Size inputGeometry_;
+    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
+    int outputSize_;
+public:
+    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
+        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
+        channelCount_=dn.channelCount_;
+        inputGeometry_=dn.inputGeometry_;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+    }
+    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
+    {
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+        this->setPreprocessor(dn.preprocessor_);
+        this->inputGeometry_=dn.inputGeometry_;
+        this->channelCount_=dn.channelCount_;
+        this->minibatchSz_=dn.minibatchSz_;
+        this->outputSize_=dn.outputSize_;
+        this->preprocessor_=dn.preprocessor_;
+        this->outputGeometry_=dn.outputGeometry_;
+        return *this;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+    }
+
+    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
+        :minibatchSz_(maxMinibatchSz)
+    {
+
+        CV_Assert(this->minibatchSz_>0);
+        CV_Assert(fileExists(modelArchFilename));
+        CV_Assert(fileExists(modelWeightsFilename));
+        CV_Assert(!preprocessor.empty());
+        this->setPreprocessor(preprocessor);
+#ifdef HAVE_CAFFE
+        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
+        CV_Assert(net_->num_inputs()==1);
+        CV_Assert(net_->num_outputs()==1);
+        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+                ||this->net_->input_blobs()[0]->channels()==3);
+        this->channelCount_=this->net_->input_blobs()[0]->channels();
+
+
+
+        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
+
+        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+
+        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = inputLayer->channels();
+
+        inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
+        net_->Reshape();
+        this->outputSize_=net_->output_blobs()[0]->channels();
+        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+
+
+
+
+
+#else
+        CV_Error(Error::StsError,"Caffe not available during compilation!");
+#endif
+    }
+
+    void classify(InputArray image, OutputArray classProbabilities)
+    {
+        std::vector<Mat> inputImageList;
+        inputImageList.push_back(image.getMat());
+        classifyBatch(inputImageList,classProbabilities);
+    }
+    void detect(InputArray image, OutputArray Bbox_prob)
+    {
+
+        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
+        Mat outputMat = Bbox_prob.getMat();
+        process_(image.getMat(),outputMat);
+        //copy back to outputArray
+        outputMat.copyTo(Bbox_prob);
+    }
+
+    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
+    {
+        std::vector<Mat> allImageVector;
+        inputImageList.getMatVector(allImageVector);
+        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
+
+        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
+        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
+        Mat outputMat = classProbabilities.getMat();
+        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
+        {
+            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
+            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
+            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
+            std::vector<Mat> minibatchInput(from,to);
+            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
+
+        }
+
+    }
+
+    int getOutputSize()
+    {
+        return this->outputSize_;
+    }
+    Size getOutputGeometry()
+    {
+        return this->outputGeometry_;
+    }
+
+    int getMinibatchSize()
+    {
+        return this->minibatchSz_;
+    }
+
+    int getBackend()
+    {
+        return OCR_HOLISTIC_BACKEND_CAFFE;
+    }
+};
+
+
+Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+{
+    if(preprocessor.empty())
+    {
+        preprocessor=ImagePreprocessor::createResizer();
+    }
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+
+Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
+{
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+namespace cnn_config{
+namespace caffe_backend{
+
+#ifdef HAVE_CAFFE
+
+bool getCaffeGpuMode()
+{
+    return caffe::Caffe::mode()==caffe::Caffe::GPU;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    if(useGpu)
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::GPU);
+    }else
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::CPU);
+    }
+}
+
+bool getCaffeAvailable()
+{
+    return true;
+}
+
+#else
+
+bool getCaffeGpuMode()
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    return 0;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    CV_Assert(useGpu==1);//Compilation directives force
+}
+
+bool getCaffeAvailable(){
+    return 0;
+}
+
+#endif
+
+}//namespace caffe
+}//namespace cnn_config
+
+class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
+private:
+    struct NetOutput{
+        //Auxiliary structure that handles the logic of getting class ids and probabillities from
+        //the raw outputs of caffe
+        int wordIdx;
+        float probabillity;
+
+        static bool sorter(const NetOutput& o1,const NetOutput& o2)
+        {//used with std::sort to provide the most probable class
+            return o1.probabillity>o2.probabillity;
+        }
+
+        static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res)
+        {
+            res.resize(nbOutputs);
+            for(int k=0;k<nbOutputs;k++)
+            {
+                res[k].wordIdx=k;
+                res[k].probabillity=buffer[k];
+            }
+            std::sort(res.begin(),res.end(),NetOutput::sorter);
+        }
+
+        static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence)
+        {
+            std::vector<NetOutput> tmp;
+            getOutputs(buffer,nbOutputs,tmp);
+            classNum=tmp[0].wordIdx;
+            confidence=tmp[0].probabillity;
+        }
+    };
+protected:
+    std::vector<String> labels_;
+    Ptr<TextImageClassifier> classifier_;
+public:
+    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename):classifier_(classifierPtr)
+    {
+        CV_Assert(fileExists(vocabularyFilename));//this fails for some rason
+        std::ifstream labelsFile(vocabularyFilename.c_str());
+        if(!labelsFile)
+        {
+            CV_Error(Error::StsError,"Could not read Labels from file");
+        }
+        std::string line;
+        while (std::getline(labelsFile, line))
+        {
+            labels_.push_back(std::string(line));
+        }
+        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
+    }
+
+    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary):classifier_(classifierPtr)
+    {
+        this->labels_=vocabulary;
+        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
+    }
+
+    void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)
+    {
+        Mat netOutput;
+        this->classifier_->classify(inputImage,netOutput);
+        int classNum;
+        NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence);
+        transcription=this->labels_[classNum];
+    }
+
+    void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec)
+    {
+        Mat netOutput;
+        this->classifier_->classifyBatch(inputImageList,netOutput);
+        for(int k=0;k<netOutput.rows;k++)
+        {
+            int classNum;
+            double confidence;
+            NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence);
+            transcriptionVec.push_back(this->labels_[classNum]);
+            confidenceVec.push_back(confidence);
+        }
+    }
+
+
+    void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
+        double confidence;
+        String transcription;
+        recogniseImage(image,transcription,confidence);
+        output_text=transcription.c_str();
+        if(component_rects!=NULL)
+        {
+            component_rects->resize(1);
+            (*component_rects)[0]=Rect(0,0,image.size().width,image.size().height);
+        }
+        if(component_texts!=NULL)
+        {
+            component_texts->resize(1);
+            (*component_texts)[0]=transcription.c_str();
+        }
+        if(component_confidences!=NULL)
+        {
+            component_confidences->resize(1);
+            (*component_confidences)[0]=float(confidence);
+        }
+    }
+
+    void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
+        this->run(image,output_text,component_rects,component_texts,component_confidences,component_level);
+    }
+
+    std::vector<String>& getVocabulary()
+    {
+        return this->labels_;
+    }
+
+    Ptr<TextImageClassifier> getClassifier()
+    {
+        return this->classifier_;
+    }
+};
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename )
+{
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
+}
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename)
+{
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
+}
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary)
+{
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
+}
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector<String>& vocabulary){
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
+}
+
+
+
+
+
+}  } //namespace text namespace cv
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
new file mode 100644
index 00000000000..8f224a70f14
--- /dev/null
+++ b/modules/text/src/text_detector.cpp
@@ -0,0 +1,643 @@
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/core.hpp"
+
+
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <queue>
+#include <algorithm>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+
+#ifdef HAVE_CAFFE
+#include "caffe/caffe.hpp"
+#endif
+
+namespace cv { namespace text {
+
+//Maybe OpenCV has a routine better suited
+//inline bool fileExists (String filename) {
+//    std::ifstream f(filename.c_str());
+//    return f.good();
+//}
+
+//************************************************************************************
+//******************   ImagePreprocessor   *******************************************
+//************************************************************************************
+
+/*void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
+    Mat inpImg=input.getMat();
+    Mat outImg;
+    this->preprocess_(inpImg,outImg,sz,outputChannels);
+    outImg.copyTo(output);
+}*/
+
+
+/*class ResizerPreprocessor: public ImagePreprocessor{
+protected:
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1){
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U){
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+    }
+public:
+    ResizerPreprocessor(){}
+    ~ResizerPreprocessor(){}
+};
+
+class StandarizerPreprocessor: public ImagePreprocessor{
+protected:
+    double sigma_;
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+        Scalar dev,mean;
+        meanStdDev(output,mean,dev);
+        subtract(output,mean[0],output);
+        divide(output,(dev[0]/sigma_),output);
+    }
+public:
+    StandarizerPreprocessor(double sigma):sigma_(sigma){}
+    ~StandarizerPreprocessor(){}
+};
+
+class MeanSubtractorPreprocessor: public ImagePreprocessor{
+protected:
+    Mat mean_;
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+        subtract(output,this->mean_,output);
+    }
+public:
+    MeanSubtractorPreprocessor(Mat mean)
+    {
+        mean.copyTo(this->mean_);
+    }
+
+    ~MeanSubtractorPreprocessor(){}
+};
+
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
+{
+    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
+{
+    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
+{
+    Mat tmp=meanImg.getMat();
+    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
+}
+
+//************************************************************************************
+//******************   TextImageClassifier   *****************************************
+//************************************************************************************
+
+void TextImageClassifier::preprocess(const Mat& input,Mat& output)
+{
+    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
+}
+
+void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
+{
+    CV_Assert(!ptr.empty());
+    preprocessor_=ptr;
+}
+
+Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
+{
+    return preprocessor_;
+}*/
+
+/*
+class DeepCNNCaffeImpl: public DeepCNN{
+protected:
+    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
+    {
+        //Classifies a list of images containing at most minibatchSz_ images
+        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
+        CV_Assert(outputMat.isContinuous());
+#ifdef HAVE_CAFFE
+        net_->input_blobs()[0]->Reshape(inputImageList.size(), 1,this->inputGeometry_.height,this->inputGeometry_.width);
+        net_->Reshape();
+        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+        float* inputData=inputBuffer;
+        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
+        {
+            Mat preprocessed;
+            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+            this->preprocess(inputImageList[imgNum],preprocessed);
+            preprocessed.copyTo(netInputWraped);
+            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+        }
+        this->net_->ForwardPrefilled();
+        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+        float*outputMatData=(float*)(outputMat.data);
+        memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
+#endif
+    }
+
+#ifdef HAVE_CAFFE
+    Ptr<caffe::Net<float> > net_;
+#endif
+    //Size inputGeometry_;
+    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
+    int outputSize_;
+public:
+    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
+        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
+        channelCount_=dn.channelCount_;
+        inputGeometry_=dn.inputGeometry_;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+    }
+    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
+    {
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+        this->setPreprocessor(dn.preprocessor_);
+        this->inputGeometry_=dn.inputGeometry_;
+        this->channelCount_=dn.channelCount_;
+        this->minibatchSz_=dn.minibatchSz_;
+        this->outputSize_=dn.outputSize_;
+        this->preprocessor_=dn.preprocessor_;
+        return *this;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+    }
+
+    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
+        :minibatchSz_(maxMinibatchSz)
+    {
+        CV_Assert(this->minibatchSz_>0);
+        CV_Assert(fileExists(modelArchFilename));
+        CV_Assert(fileExists(modelWeightsFilename));
+        CV_Assert(!preprocessor.empty());
+        this->setPreprocessor(preprocessor);
+#ifdef HAVE_CAFFE
+        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
+        CV_Assert(net_->num_inputs()==1);
+        CV_Assert(net_->num_outputs()==1);
+        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+                ||this->net_->input_blobs()[0]->channels()==3);
+        this->channelCount_=this->net_->input_blobs()[0]->channels();
+        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
+        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
+        inputLayer->Reshape(this->minibatchSz_,1,this->inputGeometry_.height, this->inputGeometry_.width);
+        net_->Reshape();
+        this->outputSize_=net_->output_blobs()[0]->channels();
+
+#else
+        CV_Error(Error::StsError,"Caffe not available during compilation!");
+#endif
+    }
+
+    void classify(InputArray image, OutputArray classProbabilities)
+    {
+        std::vector<Mat> inputImageList;
+        inputImageList.push_back(image.getMat());
+        classifyBatch(inputImageList,classProbabilities);
+    }
+
+    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
+    {
+        std::vector<Mat> allImageVector;
+        inputImageList.getMatVector(allImageVector);
+        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
+        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
+        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
+        Mat outputMat = classProbabilities.getMat();
+        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
+        {
+            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
+            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
+            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
+            std::vector<Mat> minibatchInput(from,to);
+            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
+        }
+    }
+
+    int getOutputSize()
+    {
+        return this->outputSize_;
+    }
+
+    int getMinibatchSize()
+    {
+        return this->minibatchSz_;
+    }
+
+    int getBackend()
+    {
+        return OCR_HOLISTIC_BACKEND_CAFFE;
+    }
+};
+
+
+Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+{
+    if(preprocessor.empty())
+    {
+        preprocessor=ImagePreprocessor::createResizer();
+    }
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+
+Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
+{
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+namespace cnn_config{
+namespace caffe_backend{
+
+#ifdef HAVE_CAFFE
+
+bool getCaffeGpuMode()
+{
+    return caffe::Caffe::mode()==caffe::Caffe::GPU;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    if(useGpu)
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::GPU);
+    }else
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::CPU);
+    }
+}
+
+bool getCaffeAvailable()
+{
+    return true;
+}
+
+#else
+
+bool getCaffeGpuMode()
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    return 0;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    CV_Assert(useGpu==1);//Compilation directives force
+}
+
+bool getCaffeAvailable(){
+    return 0;
+}
+
+#endif
+
+}//namespace caffe
+}//namespace cnn_config
+*/
+
+class textDetectImpl: public textDetector{
+private:
+    struct NetOutput{
+        //Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
+        //the raw outputs of caffe
+        Rect bbox;
+        float probability;
+
+//        static bool sorter(const NetOutput& o1,const NetOutput& o2)
+//        {//used with std::sort to provide the most probable class
+//            return o1.probabillity>o2.probabillity;
+//        }
+
+        static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
+        {
+
+            res.resize(nbrTextBoxes);
+            for(int k=0;k<nbrTextBoxes;k++)
+            {
+                float x_min = buffer[k*nCol+3]*inputShape.width;
+                float y_min = buffer[k*nCol+4]*inputShape.height;
+                float x_max = buffer[k*nCol+5]*inputShape.width;
+                float y_max = buffer[k*nCol +6]*inputShape.height;
+                x_min = x_min<0?0:x_min;
+                y_min = y_min<0?0:y_min;
+                x_max = x_max> inputShape.width?inputShape.width-1:x_max;
+                y_max = y_max > inputShape.height?inputShape.height-1:y_max;
+                float wd = x_max-x_min+1;
+                float ht = y_max-y_min+1;
+
+                res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
+               // printf("%f %f %f %f\n",buffer[k*nCol+3],buffer[k*nCol+4],buffer[k*nCol+5],buffer[k*nCol+6]);
+                res[k].probability=buffer[k*nCol+2];
+            }
+//            std::sort(res.begin(),res.end(),NetOutput::sorter);
+        }
+
+//        static void getDetections(const float* buffer,int nbOutputs,int &classNum,double& confidence)
+//        {
+//            std::vector<NetOutput> tmp;
+//            getOutputs(buffer,nbOutputs,tmp);
+//            classNum=tmp[0].wordIdx;
+//            confidence=tmp[0].probabillity;
+//        }
+    };
+protected:
+    //std::vector<String> labels_;
+    Ptr<TextImageClassifier> classifier_;
+public:
+    textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
+    {
+
+    }
+
+
+
+    void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
+    {
+                Mat netOutput;
+                //std::cout<<"started detect"<<std::endl;
+                this->classifier_->detect(inputImage,netOutput);
+                //std::cout<<"After Detect"<<std::endl;
+                Size OutputGeometry_ = this->classifier_->getOutputGeometry();
+                int nbrTextBoxes = OutputGeometry_.height;
+                int nCol = OutputGeometry_.width;
+                //std::cout<<nbrTextBoxes<<std::endl;
+                std::vector<NetOutput> tmp;
+                Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
+                NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
+                //Bbox.resize(nbrTextBoxes);
+                //confidence.resize(nbrTextBoxes);
+                for (int k=0;k<nbrTextBoxes;k++)
+                {
+                    Bbox.push_back(tmp[k].bbox);
+                    confidence.push_back(tmp[k].probability);
+                }
+                //Bbox = netOutput.data;
+                //confidence = netOutput.data;
+
+     }
+
+
+
+    void run(Mat& image, std::vector<Rect>* component_rects=NULL,
+             std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
+        //double confidence;
+        //String transcription;
+        std::vector<Rect> bbox;
+        std::vector<float> score;
+        textDetectInImage(image,bbox,score);
+        //output_text=transcription.c_str();
+        if(component_rects!=NULL)
+        {
+            component_rects->resize(bbox.size());  // should be a user behavior
+
+            component_rects = &bbox;
+        }
+
+        if(component_confidences!=NULL)
+        {
+            component_confidences->resize(score.size()); // shoub be a user behavior
+
+            component_confidences = &score;
+        }
+    }
+
+    void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
+             std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
+        this->run(image,component_rects,component_confidences,component_level);
+    }
+
+//    std::vector<String>& getVocabulary()
+//    {
+//        return this->labels_;
+//    }
+
+    Ptr<TextImageClassifier> getClassifier()
+    {
+        return this->classifier_;
+    }
+};
+
+Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
+{
+    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
+}
+
+Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
+{
+
+
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
+
+    Mat textbox_mean(1,3,CV_8U);
+    textbox_mean.at<uchar>(0,0)=104;
+    textbox_mean.at<uchar>(0,1)=117;
+    textbox_mean.at<uchar>(0,2)=123;
+    preprocessor->set_mean(textbox_mean);
+
+    Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
+    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
+}
+
+
+
+
+
+
+
+}  } //namespace text namespace cv
diff --git a/modules/text/text_config.hpp.in b/modules/text/text_config.hpp.in
index 30089bd3c55..71b32993acf 100644
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
@@ -1,7 +1,13 @@
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__
 
+// HAVE QT5
+//#cmakedefine HAVE_QT5GUI
+
+// HAVE CAFFE
+//#cmakedefine HAVE_CAFFE
+
 // HAVE OCR Tesseract
-#cmakedefine HAVE_TESSERACT
+//#cmakedefine HAVE_TESSERACT
 
-#endif
\ No newline at end of file
+#endif

From 40db962641ded7f125a0baecddf193968cf6656c Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Thu, 22 Jun 2017 19:19:43 +0200
Subject: [PATCH 02/31] Add sample script

---
 modules/text/samples/textbox_demo.cpp | 146 ++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 modules/text/samples/textbox_demo.cpp

diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
new file mode 100644
index 00000000000..a4155893543
--- /dev/null
+++ b/modules/text/samples/textbox_demo.cpp
@@ -0,0 +1,146 @@
+/*
+ * dictnet_demo.cpp
+ *
+ * Demonstrates simple use of the holistic word classifier in C++
+ *
+ * Created on: June 26, 2016
+ *     Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
+ */
+
+#include  "opencv2/text.hpp"
+#include  "opencv2/highgui.hpp"
+#include  "opencv2/imgproc.hpp"
+
+#include  <sstream>
+#include  <vector>
+#include  <iostream>
+#include  <iomanip>
+#include  <fstream>
+
+inline std::string getHelpStr(std::string progFname){
+    std::stringstream out;
+    out << "    Demo of text detection CNN for text detection." << std::endl;
+    out << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
+
+    out << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl;
+    out << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl;
+    out << "      must be in the current directory." << std::endl << std::endl;
+
+    out << "    Obtaining Caffe Model files in linux shell:"<<std::endl;
+    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl;
+    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl;
+    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl;
+    return out.str();
+}
+
+inline bool fileExists (std::string filename) {
+    std::ifstream f(filename.c_str());
+    return f.good();
+}
+void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6)
+{
+    for (int i=0;i<(int)groups.size(); i++)
+    {
+        if(probs[i]>thres)
+        {
+            if (src.type() == CV_8UC3)
+            {
+                cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 );
+                cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 ));
+            }
+            else
+                rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 );
+        }
+    }
+}
+
+
+int main(int argc, const char * argv[]){
+    if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
+        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
+        exit(1);
+    }
+    //set to true if you have a GPU with more than 3GB
+    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(false);
+
+    if (argc < 3){
+        std::cout<<getHelpStr(argv[0]);
+        std::cout<<"Insufiecient parameters. Aborting!"<<std::endl;
+        exit(1);
+    }
+
+    if (!fileExists("textbox.caffemodel") ||
+            !fileExists("textbox_deploy.prototxt")){
+           // !fileExists("dictnet_vgg_labels.txt"))
+
+        std::cout<<getHelpStr(argv[0]);
+        std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl;
+        exit(1);
+    }
+
+    if (fileExists(argv[1])){
+        std::cout<<getHelpStr(argv[0]);
+        std::cout<<"Output file must not exist. Aborting!"<<std::endl;
+        exit(1);
+    }
+
+    cv::Mat image;
+    image = cv::imread(cv::String(argv[2]));
+
+
+    std::cout<<"Starting Text Box Demo"<<std::endl;
+    cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create(
+                "textbox_deploy.prototxt","textbox.caffemodel");
+
+    //cv::Ptr<cv::text::textDetector> wordSpotter=
+      //      cv::text::textDetector::create(cnn);
+    std::cout<<"Created Text Spotter with text Boxes";
+
+    std::vector<cv::Rect> bbox;
+    std::vector<float> outProbabillities;
+    textSpotter->textDetectInImage(image,bbox,outProbabillities);
+   // textbox_draw(image, bbox,outProbabillities);
+    float thres =0.6;
+    std::vector<cv::Mat> imageList;
+    for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
+        if(outProbabillities[imageIdx]>thres){
+            imageList.push_back(image(bbox.at(imageIdx)));
+        }
+
+    }
+    // call dict net here for all detected parts
+    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
+                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
+
+    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
+            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
+
+    std::vector<cv::String> wordList;
+    std::vector<double> wordProbabillities;
+    wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities);
+    // write the output in file
+    std::ofstream out;
+    out.open(argv[1]);
+
+
+    for (int i=0;i<(int)wordList.size(); i++)
+    {
+        cv::Point tl_ = bbox.at(i).tl();
+        cv::Point br_ = bbox.at(i).br();
+
+        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
+
+    }
+    out.close();
+    textbox_draw(image, bbox,outProbabillities,wordList);
+
+
+    cv::imshow("TextBox Demo",image);
+    std::cout << "Done!" << std::endl << std::endl;
+    std::cout << "Press any key to exit." << std::endl << std::endl;
+    if ((cv::waitKey()&0xff) == ' ')
+        return 0;
+
+
+}
+

From fc9c41b8d47e4d70db52ffe16d3b1a74ea3b514f Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Fri, 23 Jun 2017 18:36:33 +0200
Subject: [PATCH 03/31] Minor modification

---
 modules/text/include/opencv2/text/ocr.hpp |   2 +
 modules/text/src/text_detector.cpp        | 503 +---------------------
 2 files changed, 16 insertions(+), 489 deletions(-)

diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 9fc5403fdef..e0afe5ca4d6 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -722,6 +722,8 @@ class CV_EXPORTS_W TextImageClassifier
     /** @brief simple getter method returning the size of the oputput row-vector
      */
     CV_WRAP virtual int getOutputSize()=0;
+    /** @brief simple getter method returning the shape of the oputput from caffe
+     */
     CV_WRAP virtual Size getOutputGeometry()=0;
 
     /** @brief simple getter method returning the size of the minibatches for this classifier.
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
index 8f224a70f14..5b18e970861 100644
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@@ -22,468 +22,6 @@
 
 namespace cv { namespace text {
 
-//Maybe OpenCV has a routine better suited
-//inline bool fileExists (String filename) {
-//    std::ifstream f(filename.c_str());
-//    return f.good();
-//}
-
-//************************************************************************************
-//******************   ImagePreprocessor   *******************************************
-//************************************************************************************
-
-/*void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
-    Mat inpImg=input.getMat();
-    Mat outImg;
-    this->preprocess_(inpImg,outImg,sz,outputChannels);
-    outImg.copyTo(output);
-}*/
-
-
-/*class ResizerPreprocessor: public ImagePreprocessor{
-protected:
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1){
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U){
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-    }
-public:
-    ResizerPreprocessor(){}
-    ~ResizerPreprocessor(){}
-};
-
-class StandarizerPreprocessor: public ImagePreprocessor{
-protected:
-    double sigma_;
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-        Scalar dev,mean;
-        meanStdDev(output,mean,dev);
-        subtract(output,mean[0],output);
-        divide(output,(dev[0]/sigma_),output);
-    }
-public:
-    StandarizerPreprocessor(double sigma):sigma_(sigma){}
-    ~StandarizerPreprocessor(){}
-};
-
-class MeanSubtractorPreprocessor: public ImagePreprocessor{
-protected:
-    Mat mean_;
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-        subtract(output,this->mean_,output);
-    }
-public:
-    MeanSubtractorPreprocessor(Mat mean)
-    {
-        mean.copyTo(this->mean_);
-    }
-
-    ~MeanSubtractorPreprocessor(){}
-};
-
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
-{
-    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
-{
-    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
-{
-    Mat tmp=meanImg.getMat();
-    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
-}
-
-//************************************************************************************
-//******************   TextImageClassifier   *****************************************
-//************************************************************************************
-
-void TextImageClassifier::preprocess(const Mat& input,Mat& output)
-{
-    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
-}
-
-void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
-{
-    CV_Assert(!ptr.empty());
-    preprocessor_=ptr;
-}
-
-Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
-{
-    return preprocessor_;
-}*/
-
-/*
-class DeepCNNCaffeImpl: public DeepCNN{
-protected:
-    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
-    {
-        //Classifies a list of images containing at most minibatchSz_ images
-        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
-        CV_Assert(outputMat.isContinuous());
-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(inputImageList.size(), 1,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
-        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-        {
-            Mat preprocessed;
-            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-            this->preprocess(inputImageList[imgNum],preprocessed);
-            preprocessed.copyTo(netInputWraped);
-            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-        }
-        this->net_->ForwardPrefilled();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        float*outputMatData=(float*)(outputMat.data);
-        memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
-#endif
-    }
-
-#ifdef HAVE_CAFFE
-    Ptr<caffe::Net<float> > net_;
-#endif
-    //Size inputGeometry_;
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    int outputSize_;
-public:
-    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
-        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
-        channelCount_=dn.channelCount_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
-    {
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->channelCount_=dn.channelCount_;
-        this->minibatchSz_=dn.minibatchSz_;
-        this->outputSize_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
-    {
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_CAFFE
-        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-        CV_Assert(net_->num_inputs()==1);
-        CV_Assert(net_->num_outputs()==1);
-        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-                ||this->net_->input_blobs()[0]->channels()==3);
-        this->channelCount_=this->net_->input_blobs()[0]->channels();
-        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
-        inputLayer->Reshape(this->minibatchSz_,1,this->inputGeometry_.height, this->inputGeometry_.width);
-        net_->Reshape();
-        this->outputSize_=net_->output_blobs()[0]->channels();
-
-#else
-        CV_Error(Error::StsError,"Caffe not available during compilation!");
-#endif
-    }
-
-    void classify(InputArray image, OutputArray classProbabilities)
-    {
-        std::vector<Mat> inputImageList;
-        inputImageList.push_back(image.getMat());
-        classifyBatch(inputImageList,classProbabilities);
-    }
-
-    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
-    {
-        std::vector<Mat> allImageVector;
-        inputImageList.getMatVector(allImageVector);
-        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
-        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
-        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
-        Mat outputMat = classProbabilities.getMat();
-        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
-        {
-            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
-            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
-            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
-            std::vector<Mat> minibatchInput(from,to);
-            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
-        }
-    }
-
-    int getOutputSize()
-    {
-        return this->outputSize_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_CAFFE;
-    }
-};
-
-
-Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
-{
-    if(preprocessor.empty())
-    {
-        preprocessor=ImagePreprocessor::createResizer();
-    }
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-
-Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
-{
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-namespace cnn_config{
-namespace caffe_backend{
-
-#ifdef HAVE_CAFFE
-
-bool getCaffeGpuMode()
-{
-    return caffe::Caffe::mode()==caffe::Caffe::GPU;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    if(useGpu)
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::GPU);
-    }else
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::CPU);
-    }
-}
-
-bool getCaffeAvailable()
-{
-    return true;
-}
-
-#else
-
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
-}
-
-bool getCaffeAvailable(){
-    return 0;
-}
-
-#endif
-
-}//namespace caffe
-}//namespace cnn_config
-*/
 
 class textDetectImpl: public textDetector{
 private:
@@ -493,10 +31,6 @@ class textDetectImpl: public textDetector{
         Rect bbox;
         float probability;
 
-//        static bool sorter(const NetOutput& o1,const NetOutput& o2)
-//        {//used with std::sort to provide the most probable class
-//            return o1.probabillity>o2.probabillity;
-//        }
 
         static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
         {
@@ -516,22 +50,16 @@ class textDetectImpl: public textDetector{
                 float ht = y_max-y_min+1;
 
                 res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
-               // printf("%f %f %f %f\n",buffer[k*nCol+3],buffer[k*nCol+4],buffer[k*nCol+5],buffer[k*nCol+6]);
+
                 res[k].probability=buffer[k*nCol+2];
             }
-//            std::sort(res.begin(),res.end(),NetOutput::sorter);
+
         }
 
-//        static void getDetections(const float* buffer,int nbOutputs,int &classNum,double& confidence)
-//        {
-//            std::vector<NetOutput> tmp;
-//            getOutputs(buffer,nbOutputs,tmp);
-//            classNum=tmp[0].wordIdx;
-//            confidence=tmp[0].probabillity;
-//        }
+
     };
 protected:
-    //std::vector<String> labels_;
+
     Ptr<TextImageClassifier> classifier_;
 public:
     textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
@@ -544,25 +72,24 @@ class textDetectImpl: public textDetector{
     void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
     {
                 Mat netOutput;
-                //std::cout<<"started detect"<<std::endl;
+                // call the detect function of deepCNN class
                 this->classifier_->detect(inputImage,netOutput);
-                //std::cout<<"After Detect"<<std::endl;
+               // get the output geometry i.e height and width of output blob from caffe
                 Size OutputGeometry_ = this->classifier_->getOutputGeometry();
                 int nbrTextBoxes = OutputGeometry_.height;
                 int nCol = OutputGeometry_.width;
-                //std::cout<<nbrTextBoxes<<std::endl;
+
                 std::vector<NetOutput> tmp;
+                // the output bounding box needs to be resized by the input height and width
                 Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
                 NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
-                //Bbox.resize(nbrTextBoxes);
-                //confidence.resize(nbrTextBoxes);
+                // put the output in CV_OUT
+
                 for (int k=0;k<nbrTextBoxes;k++)
                 {
                     Bbox.push_back(tmp[k].bbox);
                     confidence.push_back(tmp[k].probability);
                 }
-                //Bbox = netOutput.data;
-                //confidence = netOutput.data;
 
      }
 
@@ -602,10 +129,7 @@ class textDetectImpl: public textDetector{
         this->run(image,component_rects,component_confidences,component_level);
     }
 
-//    std::vector<String>& getVocabulary()
-//    {
-//        return this->labels_;
-//    }
+
 
     Ptr<TextImageClassifier> getClassifier()
     {
@@ -621,15 +145,16 @@ Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
 Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
 {
 
-
+// create a custom preprocessor with rawval
     Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
+// set the mean for the preprocessor
 
     Mat textbox_mean(1,3,CV_8U);
     textbox_mean.at<uchar>(0,0)=104;
     textbox_mean.at<uchar>(0,1)=117;
     textbox_mean.at<uchar>(0,2)=123;
     preprocessor->set_mean(textbox_mean);
-
+// create a pointer to text box detector(textDetector)
     Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
     return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }

From e494efb4b0884c0b68a8de7d7684ee385d8e222e Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Fri, 23 Jun 2017 19:09:17 +0200
Subject: [PATCH 04/31] Added comments

---
 modules/text/include/opencv2/text/ocr.hpp     |  14 +++
 .../include/opencv2/text/textDetector.hpp     | 104 +++---------------
 2 files changed, 28 insertions(+), 90 deletions(-)

diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index e0afe5ca4d6..9593a1415fd 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -633,6 +633,16 @@ class CV_EXPORTS_W ImagePreprocessor{
      */
     CV_WRAP void preprocess(InputArray input,OutputArray output,Size sz,int outputChannels);
 
+    /** @brief this method in provides public acces to set the mean of the input images
+     * mean can be a mat either of same size of the image or one value per color channel
+     * A preprocessor can be created without the mean( the pre processor will calculate mean for every image
+     * in that case
+     *
+
+     * @param mean which will be subtracted from the images
+     *
+     */
+
     CV_WRAP void set_mean(Mat mean);
 
     /** @brief Creates a functor that only resizes and changes the channels of the input
@@ -655,6 +665,10 @@ class CV_EXPORTS_W ImagePreprocessor{
      * @return shared pointer to generated preprocessor
      */
     CV_WRAP static Ptr<ImagePreprocessor> createImageMeanSubtractor(InputArray meanImg);
+    /** @brief
+     * create a functor with the parameters, parameters can be changes by corresponding set functions
+     * @return shared pointer to generated preprocessor
+     */
 
     CV_WRAP static Ptr<ImagePreprocessor>createImageCustomPreprocessor(double rawval=1.0,String channel_order="BGR");
 
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index 262795733d9..ea1c7de9d4b 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -62,7 +62,7 @@ namespace text
 
 
 //base class BaseDetector declares a common API that would be used in a typical text
-//recognition scenario
+//detection scenario
 class CV_EXPORTS_W BaseDetector
 {
  public:
@@ -78,46 +78,7 @@ class CV_EXPORTS_W BaseDetector
                      std::vector<float>* component_confidences=NULL,
                      int component_level=0) = 0;
 
-    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
-     * default parameters for all parameters other than the input image.
-     */
-//    virtual std::vector<Rect>* run(InputArray image){
-//        //std::string res;
-//        std::vector<Rect> component_rects;
-//        std::vector<float> component_confidences;
-//        //std::vector<std::string> component_texts;
-//        Mat inputImage=image.getMat();
-//        this->run(inputImage,&component_rects,
-//                  &component_confidences,OCR_LEVEL_WORD);
-//        return *component_rects;
-//    }
-
-};
-
-
-//Classifiers should provide diferent backends
-//For the moment only caffe is implemeted
-//enum{
-//    OCR_HOLISTIC_BACKEND_NONE,
-//    OCR_HOLISTIC_BACKEND_CAFFE
-//};
-
-
-
-
-
-/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
- * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
- * word given an input image.
- *
- * This class implements the logic of providing transcriptions given a vocabulary and and an image
- * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
- * class was built is the DictNet. In order to load it the following files should be downloaded:
 
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
- */
 class CV_EXPORTS_W textDetector : public BaseDetector
 {
 public:
@@ -125,7 +86,7 @@ class CV_EXPORTS_W textDetector : public BaseDetector
                      std::vector<float>* component_confidences=NULL,
                      int component_level=OCR_LEVEL_WORD)=0;
 
-    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
+    /** @brief detect text with a cnn, input is one image with (multiple) ocuurance of text.
 
     Takes image on input and returns recognized text in the output_text parameter. Optionally
     provides also the Rects for individual text elements found (e.g. words), and the list of those
@@ -135,16 +96,12 @@ class CV_EXPORTS_W textDetector : public BaseDetector
 
     @param mask is totally ignored and is only available for compatibillity reasons
 
-    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
 
-    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
+    @param component_rects a vector of Rects, each rect is one text bounding box.
 
-    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
 
-    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
+
+    @param component_confidences A vector of float returns confidence of text bounding boxes
 
     @param component_level must be OCR_LEVEL_WORD.
      */
@@ -155,76 +112,43 @@ class CV_EXPORTS_W textDetector : public BaseDetector
 
 
     /**
-    @brief Method that provides a quick and simple interface to a single word image classifcation
+    @brief Method that provides a quick and simple interface to detect text inside an image
 
-    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size
+    @param inputImage an image expected to be a CV_U8C3 of any size
 
-    @param transcription an opencv string that will store the detected word transcription
+    @param Bbox a vector of Rect that will store the detected word bounding box
 
-    @param confidence a double that will be updated with the confidence the classifier has for the selected word
+    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
     */
     CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
 
-    /**
-    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
-    the classifiers parallel capabilities.
-
-    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
-    to contain a single word.
 
-    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
-    input image
-
-    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
-    selected words.
-    */
-    //CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
 
 
    /** @brief simple getter for the preprocessing functor
      */
     CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
 
-    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
+    /** @brief Creates an instance of the textDetector class.
 
     @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
 
-    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
-    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
-    of the classifier.
+
      */
     CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
 
 
-    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
+    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
 
     @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
 
     @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
 
-    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
-    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
-    of the classifier.
+
     */
     CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
 
-    /** @brief
-     *
-     * @param classifierPtr
-     *
-     * @param vocabulary
-     */
- //   CV_WRAP static Ptr<textDetectImage> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
-
-    /** @brief
-     *
-     * @param modelArchFilename
-     *
-     * @param modelWeightsFilename
-     *
-     * @param vocabulary
-     */
- //   CV_WRAP static Ptr<textDetectImage> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
+
 };
 
 

From 2b8ed124f2eacae9c4c8833382ceea30eee67447 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Wed, 5 Jul 2017 16:34:55 +0200
Subject: [PATCH 05/31] added instructions to build

---
 modules/text/README.md | 70 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/modules/text/README.md b/modules/text/README.md
index 3a3a897f7c3..8d0648cfe59 100644
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -118,4 +118,74 @@ CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe
 cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules"   ./
 
 
+```
+
+Text Detection CNN
+=================
+
+Intro
+-----
+
+A text detection CNN is a CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there.
+Although other backends will be supported, for the moment only the Caffe backend is supported.
+
+
+
+
+Instalation of Caffe backend
+----------------------------
+* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
+The caffe wrapping backend has the requirements caffe does.
+* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
+The simplest solution is to build caffe without support for OpenCV.
+* Only the OS supported by Caffe are supported by the backend. 
+The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
+Other UNIX systems including OSX should be easy to adapt.
+
+Sample script for building Caffe
+
+```bash
+#!/bin/bash
+SRCROOT="${HOME}/caffe_inst/"
+mkdir -p "$SRCROOT"
+cd "$SRCROOT"
+git clone https://github.com/sghoshcvc/TextBoxes.git
+cd TextBoxes
+cat Makefile.config.example  > Makefile.config
+echo 'USE_OPENCV := 0' >> Makefile.config
+echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
+echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
+
+
+echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
++++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
+@@ -234,6 +234,7 @@
+ 
+     template <typename T>
+     friend class Net;
++    virtual ~Callback(){}
+   };
+   const vector<Callback*>& before_forward() const { return before_forward_; }
+   void add_before_forward(Callback* value) {
+">/tmp/cleanup_caffe.diff
+
+patch < /tmp/cleanup_caffe.diff
+
+
+make -j 6
+
+make pycaffe
+
+make distribute
+```
+
+
+```bash
+#!/bin/bash
+cd $OPENCV_BUILD_DIR #You must set this
+CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
+
+cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules"   ./
+
+
 ```

From be395e59814a32f4ff856e295a076c596de06a6e Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Wed, 19 Jul 2017 16:58:11 +0200
Subject: [PATCH 06/31] Modified the class heirarchy

---
 modules/text/include/opencv2/text/ocr.hpp     |   4 -
 .../include/opencv2/text/textDetector.hpp     | 124 ++++++-
 modules/text/src/ocr_holistic.cpp             |  82 ++---
 modules/text/src/text_detector.cpp            |  12 +-
 modules/text/src/text_detectorCNN.cpp         | 343 ++++++++++++++++++
 5 files changed, 509 insertions(+), 56 deletions(-)
 create mode 100644 modules/text/src/text_detectorCNN.cpp

diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 9593a1415fd..bd1c18ffb11 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -716,10 +716,6 @@ class CV_EXPORTS_W TextImageClassifier
     /** @brief produces a class confidence row-vector given an image
      */
     CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
-    /** @brief produces a list of bounding box given an image
-     */
-
-    CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;
 
     /** @brief produces a matrix containing class confidence row-vectors given an collection of images
      */
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index ea1c7de9d4b..efbec6bffa9 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -65,19 +65,131 @@ namespace text
 //detection scenario
 class CV_EXPORTS_W BaseDetector
 {
- public:
+public:
     virtual ~BaseDetector() {};
 
     virtual void run(Mat& image,
-                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<Rect>* component_rects=NULL,
                      std::vector<float>* component_confidences=NULL,
                      int component_level=0) = 0;
 
     virtual void run(Mat& image, Mat& mask,
-                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<Rect>* component_rects=NULL,
                      std::vector<float>* component_confidences=NULL,
                      int component_level=0) = 0;
 
+};
+/** A virtual class for different models of text detection (including CNN based deep models)
+ */
+
+class CV_EXPORTS_W TextRegionDetector
+{
+protected:
+    /** Stores input and output size
+     */
+    //netGeometry inputGeometry_;
+    //netGeometry outputGeometry_;
+    Size inputGeometry_;
+    Size outputGeometry_;
+    int inputChannelCount_;
+    int outputChannelCount_;
+
+public:
+    virtual ~TextRegionDetector() {}
+
+    /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
+     */
+    CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
+
+
+    /** @brief simple getter method returning the size (height, width) of the input sample
+     */
+    CV_WRAP virtual Size  getInputGeometry(){return this->inputGeometry_;}
+
+    /** @brief simple getter method returning the shape of the oputput
+     *   Any text detector should output a number of text regions alongwith a score of text-ness
+     *   From the shape it can be inferred the number of text regions and number of returned value
+     *   for each region
+     */
+    CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
+
+
+
+};
+
+/** Generic structure of Deep CNN based Text Detectors
+ * */
+class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
+{
+    /** @brief Class that uses a pretrained caffe model for text detection.
+     * Any text detection should
+     * This network is described in detail in:
+     * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
+     * https://arxiv.org/abs/1611.06779
+     */
+protected:
+    /** all deep CNN based text detectors have a preprocessor (normally)
+         */
+    Ptr<ImagePreprocessor> preprocessor_;
+    /** @brief all image preprocessing is handled here including whitening etc.
+         *
+         *  @param input the image to be preprocessed for the classifier. If the depth
+         * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
+         *
+         * @param output reference to the image to be fed to the classifier, the preprocessor will
+         * resize the image to the apropriate size and convert it to the apropriate depth\
+         *
+         * The method preprocess should never be used externally, it is up to classify and classifyBatch
+         * methods to employ it.
+         */
+    virtual void preprocess(const Mat& input,Mat& output);
+public:
+    virtual ~DeepCNNTextDetector() {};
+
+    /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
+     *
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
+     *
+     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
+     *
+     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
+     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+
+    /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
+     *
+     * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
+     * The architecture and models weights can be downloaded from:
+     * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
+
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model. When employing
+     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+    friend class ImagePreprocessor;
+
+};
+
+/** @brief textDetector class provides the functionallity of text bounding box detection.
+ * A TextRegionDetector is employed to find bounding boxes of text
+ * words given an input image.
+ *
+ * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
+ * The TextRegionDetector can be any text detector
+ *
+ */
 
 class CV_EXPORTS_W textDetector : public BaseDetector
 {
@@ -125,9 +237,9 @@ class CV_EXPORTS_W textDetector : public BaseDetector
 
 
 
-   /** @brief simple getter for the preprocessing functor
+    /** @brief simple getter for the preprocessing functor
      */
-    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+    CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
 
     /** @brief Creates an instance of the textDetector class.
 
@@ -135,7 +247,7 @@ class CV_EXPORTS_W textDetector : public BaseDetector
 
 
      */
-    CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
+    CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
 
 
     /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index 9791e62bbf5..ae73b04dc86 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -459,53 +459,53 @@ class DeepCNNCaffeImpl: public DeepCNN{
 #endif
     }
 
-    void process_(Mat inputImage, Mat &outputMat)
-    {
-        // do forward pass and stores the output in outputMat
-        //Process one image
-        CV_Assert(this->minibatchSz_==1);
-        //CV_Assert(outputMat.isContinuous());
+//    void process_(Mat inputImage, Mat &outputMat)
+//    {
+//        // do forward pass and stores the output in outputMat
+//        //Process one image
+//        CV_Assert(this->minibatchSz_==1);
+//        //CV_Assert(outputMat.isContinuous());
 
-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
+//#ifdef HAVE_CAFFE
+//        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+//        net_->Reshape();
+//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+//        float* inputData=inputBuffer;
 
-        std::vector<Mat> input_channels;
-        Mat preprocessed;
-        // if the image have multiple color channels the input layer should be populated accordingly
-        for (int channel=0;channel < this->channelCount_;channel++){
+//        std::vector<Mat> input_channels;
+//        Mat preprocessed;
+//        // if the image have multiple color channels the input layer should be populated accordingly
+//        for (int channel=0;channel < this->channelCount_;channel++){
 
-            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-            input_channels.push_back(netInputWraped);
-            //input_data += width * height;
-            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-        }
-        this->preprocess(inputImage,preprocessed);
-        split(preprocessed, input_channels);
+//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+//            input_channels.push_back(netInputWraped);
+//            //input_data += width * height;
+//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+//        }
+//        this->preprocess(inputImage,preprocessed);
+//        split(preprocessed, input_channels);
 
-        //preprocessed.copyTo(netInputWraped);
+//        //preprocessed.copyTo(netInputWraped);
 
 
-        this->net_->Forward();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
+//        this->net_->Forward();
+//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
 
 
 
 
-        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
-        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-        float*outputMatData=(float*)(outputMat.data);
+//        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+//        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
+//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+//        float*outputMatData=(float*)(outputMat.data);
 
-        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
 
 
 
-#endif
-    }
+//#endif
+//    }
 
 
 
@@ -587,15 +587,15 @@ class DeepCNNCaffeImpl: public DeepCNN{
         inputImageList.push_back(image.getMat());
         classifyBatch(inputImageList,classProbabilities);
     }
-    void detect(InputArray image, OutputArray Bbox_prob)
-    {
+//    void detect(InputArray image, OutputArray Bbox_prob)
+//    {
 
-        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
-        Mat outputMat = Bbox_prob.getMat();
-        process_(image.getMat(),outputMat);
-        //copy back to outputArray
-        outputMat.copyTo(Bbox_prob);
-    }
+//        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
+//        Mat outputMat = Bbox_prob.getMat();
+//        process_(image.getMat(),outputMat);
+//        //copy back to outputArray
+//        outputMat.copyTo(Bbox_prob);
+//    }
 
     void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
     {
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
index 5b18e970861..1b979c253bf 100644
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@@ -23,6 +23,8 @@
 namespace cv { namespace text {
 
 
+
+
 class textDetectImpl: public textDetector{
 private:
     struct NetOutput{
@@ -60,9 +62,9 @@ class textDetectImpl: public textDetector{
     };
 protected:
 
-    Ptr<TextImageClassifier> classifier_;
+    Ptr<TextRegionDetector> classifier_;
 public:
-    textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
+    textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
     {
 
     }
@@ -131,13 +133,13 @@ class textDetectImpl: public textDetector{
 
 
 
-    Ptr<TextImageClassifier> getClassifier()
+    Ptr<TextRegionDetector> getClassifier()
     {
         return this->classifier_;
     }
 };
 
-Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
+Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
 {
     return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }
@@ -155,7 +157,7 @@ Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWei
     textbox_mean.at<uchar>(0,2)=123;
     preprocessor->set_mean(textbox_mean);
 // create a pointer to text box detector(textDetector)
-    Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
+    Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
     return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }
 
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
new file mode 100644
index 00000000000..b48e97e7cd2
--- /dev/null
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -0,0 +1,343 @@
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/core.hpp"
+
+
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <queue>
+#include <algorithm>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+
+#ifdef HAVE_CAFFE
+#include "caffe/caffe.hpp"
+#endif
+namespace cv { namespace text {
+
+inline bool fileExists (String filename) {
+    std::ifstream f(filename.c_str());
+    return f.good();
+}
+
+//************************************************************************************
+//******************   TextImageClassifier   *****************************************
+//************************************************************************************
+
+//void TextImageClassifier::preprocess(const Mat& input,Mat& output)
+//{
+//    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
+//}
+
+//void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
+//{
+//    CV_Assert(!ptr.empty());
+//    preprocessor_=ptr;
+//}
+
+//Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
+//{
+//    return preprocessor_;
+//}
+
+
+class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
+protected:
+
+
+    void process_(Mat inputImage, Mat &outputMat)
+    {
+        // do forward pass and stores the output in outputMat
+        //Process one image
+        // CV_Assert(this->outputGeometry_.batchSize==1);
+        //CV_Assert(outputMat.isContinuous());
+
+#ifdef HAVE_CAFFE
+        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+        net_->Reshape();
+        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+        float* inputData=inputBuffer;
+
+        std::vector<Mat> input_channels;
+        Mat preprocessed;
+        // if the image have multiple color channels the input layer should be populated accordingly
+        for (int channel=0;channel < this->inputChannelCount_;channel++){
+
+            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+            input_channels.push_back(netInputWraped);
+            //input_data += width * height;
+            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+        }
+        this->preprocess(inputImage,preprocessed);
+        split(preprocessed, input_channels);
+
+        //preprocessed.copyTo(netInputWraped);
+
+
+        this->net_->Forward();
+        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
+
+
+
+
+        this->outputGeometry_.height = net_->output_blobs()[0]->height();
+        this->outputGeometry_.width = net_->output_blobs()[0]->width();
+        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
+        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
+        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+        float*outputMatData=(float*)(outputMat.data);
+
+        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+
+
+
+#endif
+    }
+
+
+
+#ifdef HAVE_CAFFE
+    Ptr<caffe::Net<float> > net_;
+#endif
+    //Size inputGeometry_;
+    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
+    //int outputSize_;
+public:
+    DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn):
+        minibatchSz_(dn.minibatchSz_){
+        outputGeometry_=dn.outputGeometry_;
+        inputGeometry_=dn.inputGeometry_;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+    }
+    DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn)
+    {
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+        this->setPreprocessor(dn.preprocessor_);
+        this->inputGeometry_=dn.inputGeometry_;
+        this->inputChannelCount_=dn.inputChannelCount_;
+        this->outputChannelCount_ = dn.outputChannelCount_;
+        // this->minibatchSz_=dn.minibatchSz_;
+        //this->outputGeometry_=dn.outputSize_;
+        this->preprocessor_=dn.preprocessor_;
+        this->outputGeometry_=dn.outputGeometry_;
+        return *this;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+    }
+
+    DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
+        :minibatchSz_(maxMinibatchSz)
+    {
+
+        CV_Assert(this->minibatchSz_>0);
+        CV_Assert(fileExists(modelArchFilename));
+        CV_Assert(fileExists(modelWeightsFilename));
+        CV_Assert(!preprocessor.empty());
+        this->setPreprocessor(preprocessor);
+#ifdef HAVE_CAFFE
+        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
+        CV_Assert(net_->num_inputs()==1);
+        CV_Assert(net_->num_outputs()==1);
+        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+                ||this->net_->input_blobs()[0]->channels()==3);
+        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
+
+
+
+        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
+
+        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+
+        this->inputGeometry_.height = inputLayer->height();
+        this->inputGeometry_.width = inputLayer->width();
+        this->inputChannelCount_ = inputLayer->channels();
+        //this->inputGeometry_.batchSize =1;
+
+        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
+        net_->Reshape();
+        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
+        //this->outputGeometry_.batchSize =1;
+        this->outputGeometry_.height =net_->output_blobs()[0]->height();
+        this->outputGeometry_.width = net_->output_blobs()[0]->width();
+
+
+
+
+
+#else
+        CV_Error(Error::StsError,"Caffe not available during compilation!");
+#endif
+    }
+
+
+    void detect(InputArray image, OutputArray Bbox_prob)
+    {
+        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
+        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
+        Mat outputMat = Bbox_prob.getMat();
+        process_(image.getMat(),outputMat);
+        //copy back to outputArray
+        outputMat.copyTo(Bbox_prob);
+    }
+
+
+
+    //int getOutputSize()
+    //{
+    //  return this->outputSize_;
+    //}
+    Size getOutputGeometry()
+    {
+        return this->outputGeometry_;
+    }
+    Size getinputGeometry()
+    {
+        return this->inputGeometry_;
+    }
+
+    int getMinibatchSize()
+    {
+        return this->minibatchSz_;
+    }
+
+    int getBackend()
+    {
+        return OCR_HOLISTIC_BACKEND_CAFFE;
+    }
+    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
+    {
+        CV_Assert(!ptr.empty());
+        preprocessor_=ptr;
+    }
+
+    Ptr<ImagePreprocessor> getPreprocessor()
+    {
+        return preprocessor_;
+    }
+};
+
+
+Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+{
+    if(preprocessor.empty())
+    {
+        // create a custom preprocessor with rawval
+        Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
+        // set the mean for the preprocessor
+
+        Mat textbox_mean(1,3,CV_8U);
+        textbox_mean.at<uchar>(0,0)=104;
+        textbox_mean.at<uchar>(0,1)=117;
+        textbox_mean.at<uchar>(0,2)=123;
+        preprocessor->set_mean(textbox_mean);
+    }
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNNTextDetector>();
+        break;
+    }
+    return Ptr<DeepCNNTextDetector>();
+
+}
+
+
+Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd)
+{
+
+    // create a custom preprocessor with rawval
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
+    // set the mean for the preprocessor
+
+    Mat textbox_mean(1,3,CV_8U);
+    textbox_mean.at<uchar>(0,0)=104;
+    textbox_mean.at<uchar>(0,1)=117;
+    textbox_mean.at<uchar>(0,2)=123;
+    preprocessor->set_mean(textbox_mean);
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNNTextDetector>();
+        break;
+    }
+    return Ptr<DeepCNNTextDetector>();
+
+}
+
+void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
+{
+    Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width);
+    this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_);
+}
+
+//namespace cnn_config{
+//namespace caffe_backend{
+
+//#ifdef HAVE_CAFFE
+
+//bool getCaffeGpuMode()
+//{
+//    return caffe::Caffe::mode()==caffe::Caffe::GPU;
+//}
+
+//void setCaffeGpuMode(bool useGpu)
+//{
+//    if(useGpu)
+//    {
+//        caffe::Caffe::set_mode(caffe::Caffe::GPU);
+//    }else
+//    {
+//        caffe::Caffe::set_mode(caffe::Caffe::CPU);
+//    }
+//}
+
+//bool getCaffeAvailable()
+//{
+//    return true;
+//}
+
+//#else
+
+//bool getCaffeGpuMode()
+//{
+//    CV_Error(Error::StsError,"Caffe not available during compilation!");
+//    return 0;
+//}
+
+//void setCaffeGpuMode(bool useGpu)
+//{
+//    CV_Error(Error::StsError,"Caffe not available during compilation!");
+//    CV_Assert(useGpu==1);//Compilation directives force
+//}
+
+//bool getCaffeAvailable(){
+//    return 0;
+//}
+
+//#endif
+
+//}//namespace caffe
+//}//namespace cnn_config
+
+}  } //namespace text namespace cv
+

From 1bc908bdbd0b5f95c729d81d24b4862d9ff40f3e Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Wed, 19 Jul 2017 18:57:16 +0200
Subject: [PATCH 07/31] Added python sample script

---
 modules/text/samples/deeptextdetection.py | 59 +++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 modules/text/samples/deeptextdetection.py

diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py
new file mode 100644
index 00000000000..e2f67a3f113
--- /dev/null
+++ b/modules/text/samples/deeptextdetection.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jul 19 17:54:00 2017
+
+@author: sgnosh
+"""
+
+#!/usr/bin/python
+
+import sys
+import os
+
+import cv2
+import numpy as np
+
+print('\nDeeptextdetection.py')
+print('       A demo script of text box alogorithm of the paper:')
+print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
+
+
+if (len(sys.argv) < 2):
+  print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
+  quit()
+#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable():
+#        print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
+#
+#        quit()
+# check model and architecture file existance        
+if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
+    print " Model files not found in current directory. Aborting"
+    print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
+    
+    quit()
+       
+cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
+pathname = os.path.dirname(sys.argv[0])
+
+
+img      = cv2.imread(str(sys.argv[1]))
+textSpotter=cv2.text.textDetector_create(
+                "textbox_deploy.prototxt","textbox.caffemodel")
+rects,outProbs = textSpotter.textDetectInImage(img);
+# for visualization
+vis      = img.copy()
+# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown
+thres = 0.6
+
+
+  #Visualization
+for r in range(0,np.shape(rects)[0]):
+    if outProbs[r] >thres:
+        rect = rects[r]
+        cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2)
+       # cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
+
+
+#Visualization
+cv2.imshow("Text detection result", vis)
+cv2.waitKey(0)
\ No newline at end of file

From 73ddeab66f1d7c92458c0f60bfce23bea6eb13a4 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Wed, 19 Jul 2017 19:01:30 +0200
Subject: [PATCH 08/31] simple cleaning and added comments

---
 .../text/include/opencv2/text/erfilter.hpp    |  1 +
 modules/text/src/ocr_holistic.cpp             | 59 --------------
 modules/text/src/text_detector.cpp            |  7 +-
 modules/text/src/text_detectorCNN.cpp         | 80 +------------------
 4 files changed, 6 insertions(+), 141 deletions(-)

diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp
index af983c6c168..84d72d2a0a4 100644
--- a/modules/text/include/opencv2/text/erfilter.hpp
+++ b/modules/text/include/opencv2/text/erfilter.hpp
@@ -65,6 +65,7 @@ component tree of the image. :
  */
 struct CV_EXPORTS ERStat
 {
+
 public:
     //! Constructor
     explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index ae73b04dc86..670d1a2154f 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -459,56 +459,6 @@ class DeepCNNCaffeImpl: public DeepCNN{
 #endif
     }
 
-//    void process_(Mat inputImage, Mat &outputMat)
-//    {
-//        // do forward pass and stores the output in outputMat
-//        //Process one image
-//        CV_Assert(this->minibatchSz_==1);
-//        //CV_Assert(outputMat.isContinuous());
-
-//#ifdef HAVE_CAFFE
-//        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-//        net_->Reshape();
-//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-//        float* inputData=inputBuffer;
-
-//        std::vector<Mat> input_channels;
-//        Mat preprocessed;
-//        // if the image have multiple color channels the input layer should be populated accordingly
-//        for (int channel=0;channel < this->channelCount_;channel++){
-
-//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-//            input_channels.push_back(netInputWraped);
-//            //input_data += width * height;
-//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-//        }
-//        this->preprocess(inputImage,preprocessed);
-//        split(preprocessed, input_channels);
-
-//        //preprocessed.copyTo(netInputWraped);
-
-
-//        this->net_->Forward();
-//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-
-
-
-
-//        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-//        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
-//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-//        float*outputMatData=(float*)(outputMat.data);
-
-//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-
-
-
-//#endif
-//    }
-
-
-
 #ifdef HAVE_CAFFE
     Ptr<caffe::Net<float> > net_;
 #endif
@@ -587,15 +537,6 @@ class DeepCNNCaffeImpl: public DeepCNN{
         inputImageList.push_back(image.getMat());
         classifyBatch(inputImageList,classProbabilities);
     }
-//    void detect(InputArray image, OutputArray Bbox_prob)
-//    {
-
-//        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
-//        Mat outputMat = Bbox_prob.getMat();
-//        process_(image.getMat(),outputMat);
-//        //copy back to outputArray
-//        outputMat.copyTo(Bbox_prob);
-//    }
 
     void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
     {
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
index 1b979c253bf..9b6d4f966a4 100644
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@@ -74,7 +74,7 @@ class textDetectImpl: public textDetector{
     void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
     {
                 Mat netOutput;
-                // call the detect function of deepCNN class
+                // call the detect function of deepTextCNN class
                 this->classifier_->detect(inputImage,netOutput);
                // get the output geometry i.e height and width of output blob from caffe
                 Size OutputGeometry_ = this->classifier_->getOutputGeometry();
@@ -102,12 +102,11 @@ class textDetectImpl: public textDetector{
              int component_level=0)
     {
         CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
-        //double confidence;
-        //String transcription;
+
         std::vector<Rect> bbox;
         std::vector<float> score;
         textDetectInImage(image,bbox,score);
-        //output_text=transcription.c_str();
+
         if(component_rects!=NULL)
         {
             component_rects->resize(bbox.size());  // should be a user behavior
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index b48e97e7cd2..14cdaeb3887 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -26,27 +26,6 @@ inline bool fileExists (String filename) {
     return f.good();
 }
 
-//************************************************************************************
-//******************   TextImageClassifier   *****************************************
-//************************************************************************************
-
-//void TextImageClassifier::preprocess(const Mat& input,Mat& output)
-//{
-//    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
-//}
-
-//void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
-//{
-//    CV_Assert(!ptr.empty());
-//    preprocessor_=ptr;
-//}
-
-//Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
-//{
-//    return preprocessor_;
-//}
-
-
 class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
 protected:
 
@@ -54,9 +33,7 @@ class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
     void process_(Mat inputImage, Mat &outputMat)
     {
         // do forward pass and stores the output in outputMat
-        //Process one image
-        // CV_Assert(this->outputGeometry_.batchSize==1);
-        //CV_Assert(outputMat.isContinuous());
+
 
 #ifdef HAVE_CAFFE
         net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
@@ -191,12 +168,6 @@ class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
         outputMat.copyTo(Bbox_prob);
     }
 
-
-
-    //int getOutputSize()
-    //{
-    //  return this->outputSize_;
-    //}
     Size getOutputGeometry()
     {
         return this->outputGeometry_;
@@ -290,54 +261,7 @@ void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
     this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_);
 }
 
-//namespace cnn_config{
-//namespace caffe_backend{
-
-//#ifdef HAVE_CAFFE
-
-//bool getCaffeGpuMode()
-//{
-//    return caffe::Caffe::mode()==caffe::Caffe::GPU;
-//}
-
-//void setCaffeGpuMode(bool useGpu)
-//{
-//    if(useGpu)
-//    {
-//        caffe::Caffe::set_mode(caffe::Caffe::GPU);
-//    }else
-//    {
-//        caffe::Caffe::set_mode(caffe::Caffe::CPU);
-//    }
-//}
-
-//bool getCaffeAvailable()
-//{
-//    return true;
-//}
-
-//#else
-
-//bool getCaffeGpuMode()
-//{
-//    CV_Error(Error::StsError,"Caffe not available during compilation!");
-//    return 0;
-//}
-
-//void setCaffeGpuMode(bool useGpu)
-//{
-//    CV_Error(Error::StsError,"Caffe not available during compilation!");
-//    CV_Assert(useGpu==1);//Compilation directives force
-//}
-
-//bool getCaffeAvailable(){
-//    return 0;
-//}
-
-//#endif
-
-//}//namespace caffe
-//}//namespace cnn_config
+
 
 }  } //namespace text namespace cv
 

From 8cf800e650522e6f78070aa4224880c334181a16 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Fri, 21 Jul 2017 03:09:06 +0200
Subject: [PATCH 09/31] fix a dependency bug

---
 modules/text/src/precomp.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/text/src/precomp.hpp b/modules/text/src/precomp.hpp
index 94f05d8cc99..c7371db1e79 100644
--- a/modules/text/src/precomp.hpp
+++ b/modules/text/src/precomp.hpp
@@ -45,7 +45,7 @@
 
 #include "opencv2/text.hpp"
 
-#include "text_config.hpp"
+//#include "text_config.hpp"
 
 #ifdef HAVE_TESSERACT
 #include <tesseract/baseapi.h>

From a617059f24bef66c160606b6952ebd7154a31d7f Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Fri, 21 Jul 2017 13:11:58 +0200
Subject: [PATCH 10/31] removed Java Wrapper

---
 modules/text/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt
index 861848f704a..a8a32326f52 100644
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@@ -31,7 +31,7 @@ else()
   message(STATUS "Glog:   NO")
 endif()
 
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python java)
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
 #ocv_define_module(text ${TEXT_DEPS} WRAP python)
 
 #set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})

From ca2a2abed0bdb56d796b144505591a083a40a6a3 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Sat, 22 Jul 2017 00:24:17 +0200
Subject: [PATCH 11/31] Removed white space errors and platform specific
 warnings

---
 modules/text/CMakeLists.txt               |  1 -
 modules/text/FindTesseract.cmake          |  4 +--
 modules/text/README.md                    |  8 ++---
 modules/text/include/opencv2/text/ocr.hpp | 36 ++++++++++-------------
 modules/text/samples/deeptextdetection.py |  3 +-
 modules/text/samples/textbox_demo.cpp     |  4 +--
 modules/text/src/text_detectorCNN.cpp     |  9 ++++--
 7 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt
index a8a32326f52..5d5a52b4ad6 100644
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@@ -67,4 +67,3 @@ if()
 else()
   message(STATUS "TEXT CAFFE CONFLICT")
 endif()
-
diff --git a/modules/text/FindTesseract.cmake b/modules/text/FindTesseract.cmake
index 54c4a49297d..4622ece142e 100644
--- a/modules/text/FindTesseract.cmake
+++ b/modules/text/FindTesseract.cmake
@@ -19,6 +19,4 @@ find_library(Lept_LIBRARY NAMES lept
 set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
 if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
     set(Tesseract_FOUND 1)
-endif()
-
-        
+endif()      
diff --git a/modules/text/README.md b/modules/text/README.md
index a82bef20f06..2caf58a1e17 100644
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -66,7 +66,7 @@ Instalation of Caffe backend
 The caffe wrapping backend has the requirements caffe does.
 * Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
 The simplest solution is to build caffe without support for OpenCV.
-* Only the OS supported by Caffe are supported by the backend. 
+* Only the OS supported by Caffe are supported by the backend.
 The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
 Other UNIX systems including OSX should be easy to adapt.
 
@@ -90,7 +90,7 @@ echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
 echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
 +++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
 @@ -234,6 +234,7 @@
- 
+
      template <typename T>
      friend class Net;
 +    virtual ~Callback(){}
@@ -138,7 +138,7 @@ Instalation of Caffe backend
 The caffe wrapping backend has the requirements caffe does.
 * Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
 The simplest solution is to build caffe without support for OpenCV.
-* Only the OS supported by Caffe are supported by the backend. 
+* Only the OS supported by Caffe are supported by the backend.
 The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
 Other UNIX systems including OSX should be easy to adapt.
 
@@ -160,7 +160,7 @@ echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
 echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
 +++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
 @@ -234,6 +234,7 @@
- 
+
      template <typename T>
      friend class Net;
 +    virtual ~Callback(){}
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 8030fcb63e9..e01a16f7275 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -91,7 +91,7 @@ enum ocr_engine_mode
 };
 
 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
- 
+
 class CV_EXPORTS_W BaseOCR
 {
  public:
@@ -188,7 +188,7 @@ class CV_EXPORTS_W OCRTesseract : public BaseOCR
 
 
     /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
-       
+
     @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
     system's default directory.
     @param language an ISO 639-3 code or NULL will default to "eng".
@@ -277,8 +277,7 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
     * for the individual text elements found (e.g. words).
 
     * @param component_texts If provided the method will output a list of text
-    * strings for the recognition of individual text elements found (e.g. words)
-    * .
+    * strings for the recognition of individual text elements found (e.g. words).
 
     * @param component_confidences If provided the method will output a list of
     * confidence values for the recognition of individual text elements found
@@ -314,8 +313,7 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
     * for the individual text elements found (e.g. words).
 
     * @param component_texts If provided the method will output a list of text
-    * strings for the recognition of individual text elements found (e.g. words)
-    * .
+    * strings for the recognition of individual text elements found (e.g. words).
 
     * @param component_confidences If provided the method will output a list of
     * confidence values for the recognition of individual text elements found
@@ -596,34 +594,32 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
                                      int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                      int beam_size = 500);                              // Size of the beam in Beam Search algorithm
 
-    /** @brief This method allows to plug a classifier that is derivative of TextImageClassifier in to
-     * OCRBeamSearchDecoder as a ClassifierCallback.
 
-    @param classifier A pointer to a TextImageClassifier decendent
 
-    @param alphabet The language alphabet one char per symbol. alphabet.size() must be equal to the number of classes
-    of the classifier. In future editinons it should be replaced with a vector of strings.
+
+
+    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path.
+
+    @overload
+
+    @param filename path to a character classifier file
+
+    @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size()
+    must be equal to the number of classes of the classifier..
 
     @param transition_probabilities_table Table with transition probabilities between character
-    pairs. cols == rows == alphabet.size().
+    pairs. cols == rows == vocabulary.size().
 
     @param emission_probabilities_table Table with observation emission probabilities. cols ==
-    rows == alphabet.size().
+    rows == vocabulary.size().
 
     @param windowWidth The width of the windows to which the sliding window will be iterated. The height will
     be the height of the image. The windows might be resized to fit the classifiers input by the classifiers
     preprocessor.
 
-    @param windowStep The step for the sliding window
-
     @param mode HMM Decoding algorithm (only Viterbi for the moment)
 
     @param beam_size Size of the beam in Beam Search algorithm
-     */
-
-    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path.
-
-    @overload
 
      */
     CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const String& filename, // The character classifier file
diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py
index e2f67a3f113..8bc7a642255 100644
--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
@@ -25,11 +25,10 @@
 #        print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
 #
 #        quit()
-# check model and architecture file existance        
+# check model and architecture file existance       
 if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
     print " Model files not found in current directory. Aborting"
     print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
-    
     quit()
        
 cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index a4155893543..e36015831cf 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -61,7 +61,7 @@ int main(int argc, const char * argv[]){
         exit(1);
     }
     //set to true if you have a GPU with more than 3GB
-    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(false);
+    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
 
     if (argc < 3){
         std::cout<<getHelpStr(argv[0]);
@@ -100,7 +100,7 @@ int main(int argc, const char * argv[]){
     std::vector<float> outProbabillities;
     textSpotter->textDetectInImage(image,bbox,outProbabillities);
    // textbox_draw(image, bbox,outProbabillities);
-    float thres =0.6;
+    float thres =0.6f;
     std::vector<cv::Mat> imageList;
     for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
         if(outProbabillities[imageIdx]>thres){
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 14cdaeb3887..cf3a0c8baa0 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -19,6 +19,9 @@
 #ifdef HAVE_CAFFE
 #include "caffe/caffe.hpp"
 #endif
+
+#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__)
+
 namespace cv { namespace text {
 
 inline bool fileExists (String filename) {
@@ -33,6 +36,9 @@ class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
     void process_(Mat inputImage, Mat &outputMat)
     {
         // do forward pass and stores the output in outputMat
+        CV_Assert(outputMat.isContinuous());
+        if (inputImage.channels() != this->inputChannelCount_)
+            CV_WARN("Number of input channel(s) in the model is not same as input");
 
 
 #ifdef HAVE_CAFFE
@@ -204,7 +210,7 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String
     if(preprocessor.empty())
     {
         // create a custom preprocessor with rawval
-        Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
+        preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
         // set the mean for the preprocessor
 
         Mat textbox_mean(1,3,CV_8U);
@@ -264,4 +270,3 @@ void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
 
 
 }  } //namespace text namespace cv
-

From b913cac1df768f615b31c8bb70a87217d08cba53 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Sat, 22 Jul 2017 19:13:41 +0200
Subject: [PATCH 12/31] Fixed Doxygen Warning and error

---
 modules/text/FindTesseract.cmake          | 2 +-
 modules/text/include/opencv2/text/ocr.hpp | 4 ----
 modules/text/samples/deeptextdetection.py | 4 ++--
 modules/text/samples/textbox_demo.cpp     | 4 +---
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/modules/text/FindTesseract.cmake b/modules/text/FindTesseract.cmake
index 4622ece142e..01835e61bc7 100644
--- a/modules/text/FindTesseract.cmake
+++ b/modules/text/FindTesseract.cmake
@@ -19,4 +19,4 @@ find_library(Lept_LIBRARY NAMES lept
 set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
 if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
     set(Tesseract_FOUND 1)
-endif()      
+endif()
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index e01a16f7275..258273f710e 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -613,10 +613,6 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
     @param emission_probabilities_table Table with observation emission probabilities. cols ==
     rows == vocabulary.size().
 
-    @param windowWidth The width of the windows to which the sliding window will be iterated. The height will
-    be the height of the image. The windows might be resized to fit the classifiers input by the classifiers
-    preprocessor.
-
     @param mode HMM Decoding algorithm (only Viterbi for the moment)
 
     @param beam_size Size of the beam in Beam Search algorithm
diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py
index 8bc7a642255..060fbacacab 100644
--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
@@ -25,12 +25,12 @@
 #        print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
 #
 #        quit()
-# check model and architecture file existance       
+# check model and architecture file existance
 if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
     print " Model files not found in current directory. Aborting"
     print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
     quit()
-       
+     
 cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
 pathname = os.path.dirname(sys.argv[0])
 
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index e36015831cf..49d9b6a792a 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -17,6 +17,7 @@
 #include  <iomanip>
 #include  <fstream>
 
+void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6);
 inline std::string getHelpStr(std::string progFname){
     std::stringstream out;
     out << "    Demo of text detection CNN for text detection." << std::endl;
@@ -140,7 +141,4 @@ int main(int argc, const char * argv[]){
     std::cout << "Press any key to exit." << std::endl << std::endl;
     if ((cv::waitKey()&0xff) == ' ')
         return 0;
-
-
 }
-

From 4c9af581335e867e8494a2f7958c89c1f18fe73b Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Sat, 22 Jul 2017 19:24:39 +0200
Subject: [PATCH 13/31] Fixed Text box demo error

---
 modules/text/samples/textbox_demo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index 49d9b6a792a..8dbf2469264 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -17,7 +17,7 @@
 #include  <iomanip>
 #include  <fstream>
 
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6);
+void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres);
 inline std::string getHelpStr(std::string progFname){
     std::stringstream out;
     out << "    Demo of text detection CNN for text detection." << std::endl;

From 103fbaf4f2933f99cbb92a3cd7be0b3d1ad819c5 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Mon, 24 Jul 2017 01:11:01 +0200
Subject: [PATCH 14/31] White Space error in sample python script

---
 modules/text/samples/deeptextdetection.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py
index 060fbacacab..2e8395b60f1 100644
--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
@@ -30,7 +30,6 @@
     print " Model files not found in current directory. Aborting"
     print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
     quit()
-     
 cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
 pathname = os.path.dirname(sys.argv[0])
 

From 0e74d63d2f894731aec7b7644be8ad042801e979 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Mon, 24 Jul 2017 01:16:15 +0200
Subject: [PATCH 15/31] Modified to handle windows warning

---
 modules/text/src/text_detectorCNN.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index cf3a0c8baa0..3865e186c7b 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -230,7 +230,7 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String
         return Ptr<DeepCNNTextDetector>();
         break;
     }
-    return Ptr<DeepCNNTextDetector>();
+    //return Ptr<DeepCNNTextDetector>();
 
 }
 
@@ -257,7 +257,7 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
         return Ptr<DeepCNNTextDetector>();
         break;
     }
-    return Ptr<DeepCNNTextDetector>();
+    //return Ptr<DeepCNNTextDetector>();
 
 }
 

From 111b3bed7d50f6e39ffd912cca1b761d8c21009a Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Mon, 24 Jul 2017 13:29:45 +0200
Subject: [PATCH 16/31] Modified to silent Clang warnings

---
 modules/text/include/opencv2/text/ocr.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 258273f710e..15db8de8231 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -165,6 +165,7 @@ class CV_EXPORTS_W OCRTesseract : public BaseOCR
 
     @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
      */
+    using BaseOCR::run;
     virtual void run (Mat& image, std::string& output_text,
                      std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL,
@@ -285,6 +286,7 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
 
     * @param component_level Only OCR_LEVEL_WORD is supported.
     */
+    using BaseOCR::run;
     virtual void run (Mat& image, std::string& output_text,
                       std::vector<Rect>* component_rects=NULL,
                       std::vector<std::string>* component_texts=NULL,
@@ -542,6 +544,7 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
 
     @param component_level Only OCR_LEVEL_WORD is supported.
      */
+    using BaseOCR::run;
     virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0);

From a2cab07193689e4f3552e0c12a0256da030bf22f Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Tue, 22 Aug 2017 11:12:33 +0200
Subject: [PATCH 17/31] DNN backend initial commit

---
 modules/text/CMakeLists.txt                   |  10 +-
 modules/text/include/opencv2/text/ocr.hpp     |  11 +-
 .../include/opencv2/text/textDetector.hpp     |   4 +-
 modules/text/samples/textbox_demo.cpp         |   3 +-
 modules/text/src/ocr_holistic.cpp             | 238 ++++++++++++++++
 modules/text/src/text_detectorCNN.cpp         | 255 +++++++++++++++++-
 6 files changed, 511 insertions(+), 10 deletions(-)

diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt
index 5d5a52b4ad6..f9649ca336f 100644
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@@ -31,7 +31,7 @@ else()
   message(STATUS "Glog:   NO")
 endif()
 
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
 #ocv_define_module(text ${TEXT_DEPS} WRAP python)
 
 #set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
@@ -67,3 +67,11 @@ if()
 else()
   message(STATUS "TEXT CAFFE CONFLICT")
 endif()
+
+if(HAVE_opencv_dnn)
+	message(STATUS "dnn module found")
+	add_definitions(-DHAVE_DNN)
+	set(HAVE_DNN 1)
+else()
+	message(STATUS "dnn module not found")
+endif()
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 15db8de8231..3c739093559 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -658,9 +658,12 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas
 
 //Classifiers should provide diferent backends
 //For the moment only caffe is implemeted
+
 enum{
-    OCR_HOLISTIC_BACKEND_NONE,
-    OCR_HOLISTIC_BACKEND_CAFFE
+    OCR_HOLISTIC_BACKEND_NONE, //No back end
+    OCR_HOLISTIC_BACKEND_DNN, // dnn backend opencv_dnn
+    OCR_HOLISTIC_BACKEND_CAFFE, // caffe based backend
+    OCR_HOLISTIC_BACKEND_DEFAULT // to store default value based on environment
 };
 
 class TextImageClassifier;
@@ -831,7 +834,7 @@ class CV_EXPORTS_W DeepCNN:public TextImageClassifier
      * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
      * the only option
      */
-    CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+    CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
 
     /** @brief Constructs a DeepCNN intended to be used for word spotting.
      *
@@ -853,7 +856,7 @@ class CV_EXPORTS_W DeepCNN:public TextImageClassifier
      * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
      * the only option
      */
-    CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+    CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
 
 };
 
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index efbec6bffa9..ad1b53deed3 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -160,7 +160,7 @@ class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
      * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
      * the only option
      */
-    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
 
     /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
      *
@@ -177,7 +177,7 @@ class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
      * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
      * the only option
      */
-    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
     friend class ImagePreprocessor;
 
 };
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index 8dbf2469264..75a18a31552 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -59,9 +59,10 @@ void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float
 int main(int argc, const char * argv[]){
     if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
         std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
-        exit(1);
+        //exit(1);
     }
     //set to true if you have a GPU with more than 3GB
+     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
     cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
 
     if (argc < 3){
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index 670d1a2154f..d2a9f42ecc9 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -21,6 +21,13 @@
 #include "caffe/caffe.hpp"
 #endif
 
+#ifdef HAVE_DNN
+#include "opencv2/dnn.hpp"
+#endif
+
+using namespace cv;
+using namespace cv::dnn;
+using namespace std;
 namespace cv { namespace text {
 
 //Maybe OpenCV has a routine better suited
@@ -47,6 +54,7 @@ void ImagePreprocessor::set_mean(Mat mean){
 }
 
 
+
 class ResizerPreprocessor: public ImagePreprocessor{
 protected:
     void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
@@ -579,6 +587,183 @@ class DeepCNNCaffeImpl: public DeepCNN{
     }
 };
 
+class DeepCNNOpenCvDNNImpl: public DeepCNN{
+protected:
+
+    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
+    {
+        //Classifies a list of images containing at most minibatchSz_ images
+        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
+        CV_Assert(outputMat.isContinuous());
+
+#ifdef HAVE_DNN
+
+        std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
+
+        Mat preprocessed;
+        // preprocesses each image in the inputImageList and push to preprocessedImList
+        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
+        {
+            this->preprocess(inputImageList[imgNum],preprocessed);
+            preProcessedImList.push_back(preprocessed);
+        }
+        // set input data blob in dnn::net
+        net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
+
+        float*outputMatData=(float*)(outputMat.data);
+       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
+       Mat outputNet = this->net_->forward();
+       outputNet = outputNet.reshape(1, 1);
+
+       float*outputNetData=(float*)(outputNet.data);
+
+       memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
+
+#endif
+    }
+
+#ifdef HAVE_DNN
+    Ptr<Net> net_;
+#endif
+    //Size inputGeometry_;
+    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
+    int outputSize_;
+public:
+    DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
+        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
+        channelCount_=dn.channelCount_;
+        inputGeometry_=dn.inputGeometry_;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+#ifdef HAVE_DNN
+        this->net_=dn.net_;
+#endif
+    }
+    DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn)
+    {
+#ifdef HAVE_DNN
+        this->net_=dn.net_;
+#endif
+        this->setPreprocessor(dn.preprocessor_);
+        this->inputGeometry_=dn.inputGeometry_;
+        this->channelCount_=dn.channelCount_;
+        this->minibatchSz_=dn.minibatchSz_;
+        this->outputSize_=dn.outputSize_;
+        this->preprocessor_=dn.preprocessor_;
+        this->outputGeometry_=dn.outputGeometry_;
+        return *this;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+    }
+
+    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
+        :minibatchSz_(maxMinibatchSz)
+    {
+
+        CV_Assert(this->minibatchSz_>0);
+        CV_Assert(fileExists(modelArchFilename));
+        CV_Assert(fileExists(modelWeightsFilename));
+        CV_Assert(!preprocessor.empty());
+        this->setPreprocessor(preprocessor);
+#ifdef HAVE_DNN
+
+        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
+
+
+
+        if (this->net_.empty())
+        {
+            std::cerr << "Can't load network by using the following files: " << std::endl;
+            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
+            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
+            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
+            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
+            exit(-1);
+        }
+// find a wa to check the followings in cv::dnn ???
+//        CV_Assert(net_->num_inputs()==1);
+//        CV_Assert(net_->num_outputs()==1);
+//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+//                ||this->net_->input_blobs()[0]->channels()==3);
+//        this->channelCount_=this->net_->input_blobs()[0]->channels();
+
+
+
+        //this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
+
+        //caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+        //inputLayerId = net_->getLayerId('data');
+
+      //  inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
+       //                                     inputLayerId,
+      //                                      std::vector<MatShape>* inLayerShapes,
+      //  std::vector<MatShape>* outLayerShapes) const;
+        // should not be hard coded ideally
+
+        this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = 1;//inputLayer->channels();
+
+        //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
+        //net_->Reshape();
+        this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
+        this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+
+
+
+
+
+
+#else
+        CV_Error(Error::StsError,"DNN module not available during compilation!");
+#endif
+    }
+
+    void classify(InputArray image, OutputArray classProbabilities)
+    {
+        std::vector<Mat> inputImageList;
+        inputImageList.push_back(image.getMat());
+        classifyBatch(inputImageList,classProbabilities);
+    }
+
+    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
+    {
+        std::vector<Mat> allImageVector;
+        inputImageList.getMatVector(allImageVector);
+        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
+
+        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
+        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
+        Mat outputMat = classProbabilities.getMat();
+        printf("ekhane");
+        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
+        {
+            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
+            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
+            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
+            std::vector<Mat> minibatchInput(from,to);
+            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
+
+        }
+
+    }
+
+    int getOutputSize()
+    {
+        return this->outputSize_;
+    }
+    Size getOutputGeometry()
+    {
+        return this->outputGeometry_;
+    }
+
+    int getMinibatchSize()
+    {
+        return this->minibatchSz_;
+    }
+
+    int getBackend()
+    {
+        return OCR_HOLISTIC_BACKEND_DNN;
+    }
+};
 
 Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
 {
@@ -587,9 +772,25 @@ Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<Imag
         preprocessor=ImagePreprocessor::createResizer();
     }
     switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_DEFAULT:
+
+#ifdef HAVE_CAFFE
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+
+#elif defined(HAVE_DNN)
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+#else
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+#endif
+        break;
+
     case OCR_HOLISTIC_BACKEND_CAFFE:
         return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
         break;
+  case OCR_HOLISTIC_BACKEND_DNN:
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default:
         CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
@@ -603,9 +804,25 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
 {
     Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
     switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_DEFAULT:
+
+#ifdef HAVE_CAFFE
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+
+#elif defined(HAVE_DNN)
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+#else
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+#endif
+        break;
+
     case OCR_HOLISTIC_BACKEND_CAFFE:
         return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
         break;
+   case OCR_HOLISTIC_BACKEND_DNN:
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default:
         CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
@@ -639,6 +856,27 @@ bool getCaffeAvailable()
 {
     return true;
 }
+#elif defined(HAVE_DNN)
+
+bool getCaffeGpuMode()
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    return 0;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    CV_Assert(useGpu==1);//Compilation directives force
+}
+
+bool getCaffeAvailable(){
+    return 0;
+}
+bool getDNNAvailable(){
+    return true;
+}
+
 
 #else
 
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 3865e186c7b..a2c583c7f10 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -20,6 +20,12 @@
 #include "caffe/caffe.hpp"
 #endif
 
+#ifdef HAVE_DNN
+#include "opencv2/dnn.hpp"
+#endif
+
+using namespace cv::dnn;
+
 #define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__)
 
 namespace cv { namespace text {
@@ -205,6 +211,220 @@ class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
 };
 
 
+class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
+protected:
+
+
+    void process_(Mat inputImage, Mat &outputMat)
+    {
+        // do forward pass and stores the output in outputMat
+        CV_Assert(outputMat.isContinuous());
+        if (inputImage.channels() != this->inputChannelCount_)
+            CV_WARN("Number of input channel(s) in the model is not same as input");
+
+
+#ifdef HAVE_DNN
+
+        //std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
+
+        Mat preprocessed;
+        this->preprocess(inputImage,preprocessed);
+        printf("After preprocess");
+        // preprocesses each image in the inputImageList and push to preprocessedImList
+//        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
+//        {
+//            this->preprocess(inputImageList[imgNum],preprocessed);
+//            preProcessedImList.push_back(preprocessed);
+//        }
+        // set input data blob in dnn::net
+        //Mat temp =blobFromImage(preprocessed,1, Size(700, 700));
+        //printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]);
+        net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data");
+        printf("Input layer");
+
+
+       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
+       Mat outputNet = this->net_->forward( );//"mbox_priorbox");
+       printf("After forward");
+       //outputNet = outputNet.reshape(1, 1);
+       this->outputGeometry_.height = outputNet.size[2];
+       this->outputGeometry_.width = outputNet.size[3];
+       this->outputChannelCount_ = outputNet.size[1];
+       printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]);
+       outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+        float*outputMatData=(float*)(outputMat.data);
+       float*outputNetData=(float*)(outputNet.data);
+       int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
+
+       memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+//        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+//        net_->Reshape();
+//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+//        float* inputData=inputBuffer;
+
+//        std::vector<Mat> input_channels;
+//        Mat preprocessed;
+//        // if the image have multiple color channels the input layer should be populated accordingly
+//        for (int channel=0;channel < this->inputChannelCount_;channel++){
+
+//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+//            input_channels.push_back(netInputWraped);
+//            //input_data += width * height;
+//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+//        }
+//        this->preprocess(inputImage,preprocessed);
+//        split(preprocessed, input_channels);
+
+//        //preprocessed.copyTo(netInputWraped);
+
+
+//        this->net_->Forward();
+//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
+
+
+
+
+//        this->outputGeometry_.height = net_->output_blobs()[0]->height();
+//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
+//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
+//        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
+//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+//        float*outputMatData=(float*)(outputMat.data);
+
+//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+
+
+
+#endif
+    }
+
+
+
+#ifdef HAVE_DNN
+    Ptr<Net> net_;
+#endif
+    //Size inputGeometry_;
+    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
+    //int outputSize_;
+public:
+    DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
+        minibatchSz_(dn.minibatchSz_){
+        outputGeometry_=dn.outputGeometry_;
+        inputGeometry_=dn.inputGeometry_;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+#ifdef HAVE_DNN
+        this->net_=dn.net_;
+#endif
+    }
+    DeepCNNTextDetectorDNNImpl& operator=(const DeepCNNTextDetectorDNNImpl &dn)
+    {
+#ifdef HAVE_DNN
+        this->net_=dn.net_;
+#endif
+        this->setPreprocessor(dn.preprocessor_);
+        this->inputGeometry_=dn.inputGeometry_;
+        this->inputChannelCount_=dn.inputChannelCount_;
+        this->outputChannelCount_ = dn.outputChannelCount_;
+        // this->minibatchSz_=dn.minibatchSz_;
+        //this->outputGeometry_=dn.outputSize_;
+        this->preprocessor_=dn.preprocessor_;
+        this->outputGeometry_=dn.outputGeometry_;
+        return *this;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+    }
+
+    DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
+        :minibatchSz_(maxMinibatchSz)
+    {
+
+        CV_Assert(this->minibatchSz_>0);
+        CV_Assert(fileExists(modelArchFilename));
+        CV_Assert(fileExists(modelWeightsFilename));
+        CV_Assert(!preprocessor.empty());
+        this->setPreprocessor(preprocessor);
+#ifdef HAVE_DNN
+        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
+
+        if (this->net_.empty())
+        {
+            std::cerr << "Can't load network by using the following files: " << std::endl;
+            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
+            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
+            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
+            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
+            exit(-1);
+        }
+//        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
+//        CV_Assert(net_->num_inputs()==1);
+//        CV_Assert(net_->num_outputs()==1);
+//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+//                ||this->net_->input_blobs()[0]->channels()==3);
+//        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
+
+
+//        this->inputGeometry_.height = inputLayer->height();
+//        this->inputGeometry_.width = inputLayer->width();
+//        this->inputChannelCount_ = inputLayer->channels();
+//        //this->inputGeometry_.batchSize =1;
+
+//        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
+//        net_->Reshape();
+//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
+//        //this->outputGeometry_.batchSize =1;
+//        this->outputGeometry_.height =net_->output_blobs()[0]->height();
+//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
+        this->inputGeometry_.height =700;
+        this->inputGeometry_.width = 700 ;//inputLayer->width();
+        this->inputChannelCount_ = 3 ;//inputLayer->channels();
+
+#else
+        CV_Error(Error::StsError,"DNN module not available during compilation!");
+#endif
+    }
+
+
+    void detect(InputArray image, OutputArray Bbox_prob)
+    {
+        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
+        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
+        Mat outputMat = Bbox_prob.getMat();
+        printf("calling");
+        process_(image.getMat(),outputMat);
+        //copy back to outputArray
+        outputMat.copyTo(Bbox_prob);
+    }
+
+    Size getOutputGeometry()
+    {
+        return this->outputGeometry_;
+    }
+    Size getinputGeometry()
+    {
+        return this->inputGeometry_;
+    }
+
+    int getMinibatchSize()
+    {
+        return this->minibatchSz_;
+    }
+
+    int getBackend()
+    {
+        return OCR_HOLISTIC_BACKEND_DNN;
+    }
+    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
+    {
+        CV_Assert(!ptr.empty());
+        preprocessor_=ptr;
+    }
+
+    Ptr<ImagePreprocessor> getPreprocessor()
+    {
+        return preprocessor_;
+    }
+};
+
 Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
 {
     if(preprocessor.empty())
@@ -220,13 +440,29 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String
         preprocessor->set_mean(textbox_mean);
     }
     switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_DEFAULT:
+
+#ifdef HAVE_CAFFE
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+
+#elif defined(HAVE_DNN)
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+#else
+        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
+        return Ptr<DeepCNNTextDetector>();
+#endif
     case OCR_HOLISTIC_BACKEND_CAFFE:
 
         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
         break;
+
+    case OCR_HOLISTIC_BACKEND_DNN:
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        break;
+
     case OCR_HOLISTIC_BACKEND_NONE:
     default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
         return Ptr<DeepCNNTextDetector>();
         break;
     }
@@ -248,12 +484,27 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
     textbox_mean.at<uchar>(0,2)=123;
     preprocessor->set_mean(textbox_mean);
     switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_DEFAULT:
+
+#ifdef HAVE_CAFFE
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+
+#elif defined(HAVE_DNN)
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+#else
+        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
+        return Ptr<DeepCNNTextDetector>();
+#endif
+        break;
     case OCR_HOLISTIC_BACKEND_CAFFE:
         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
         break;
+    case OCR_HOLISTIC_BACKEND_DNN:
+         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+         break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
         return Ptr<DeepCNNTextDetector>();
         break;
     }

From c697e41b8d8415084971e5e8dc1f73d2867eab37 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Mon, 28 Aug 2017 19:25:58 +0200
Subject: [PATCH 18/31] added calculation of output size

---
 modules/text/include/opencv2/text/ocr.hpp     |  24 +++++
 .../include/opencv2/text/textDetector.hpp     |   4 +-
 modules/text/samples/textbox_demo.cpp         |  10 +-
 modules/text/src/ocr_holistic.cpp             |  98 +++++++++--------
 modules/text/src/text_detector.cpp            |   6 +-
 modules/text/src/text_detectorCNN.cpp         | 101 ++++--------------
 6 files changed, 107 insertions(+), 136 deletions(-)

diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 3c739093559..14dfc092456 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -861,6 +861,15 @@ class CV_EXPORTS_W DeepCNN:public TextImageClassifier
 };
 
 namespace cnn_config{
+
+/** @brief runtime backend information
+ *
+ * this function finds the status of backends compiled with this module
+ *
+ * @return a list of backends (caffe,opencv-dnn etc.)
+ * */
+CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
+
 namespace caffe_backend{
 
 /** @brief Prompts Caffe on the computation device beeing used
@@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
 CV_EXPORTS_W bool getCaffeAvailable();
 
 }//caffe
+namespace dnn_backend {
+
+/** @brief Provides runtime information on whether DNN module was compiled in.
+ *
+ * The text module API is the same regardless of whether DNN module was available or not
+ * During compilation. When methods that require backend are invocked while no backend support
+ * is compiled, exceptions are thrown. This method allows to test whether the
+ * text module was built with dnn_backend during runtime.
+ *
+ * @return true if opencv_dnn support for the the text module was provided during compilation,
+ * false if opencv_dnn was unavailable.
+ */
+CV_EXPORTS_W bool getDNNAvailable();
+
+}//dnn_backend
 }//cnn_config
 
 /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index ad1b53deed3..eda74801449 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -56,7 +56,7 @@ namespace cv
 namespace text
 {
 
-//! @addtogroup text_recognize
+//! @addtogroup text_detect
 //! @{
 
 
@@ -263,7 +263,7 @@ class CV_EXPORTS_W textDetector : public BaseDetector
 
 };
 
-
+//! @}
 }//namespace text
 }//namespace cv
 
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index 75a18a31552..b76658e1b7a 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
         std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
         //exit(1);
     }
+    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
+    std::cout << "The Following backends are available" << "\n";
+    for (int i=0;i<backends.size();i++)
+       std::cout << backends[i] << "\n";
+
+   // printf("%s",x);
     //set to true if you have a GPU with more than 3GB
      if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
     cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
     }
     // call dict net here for all detected parts
     cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
+                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
 
     cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
             cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
         cv::Point tl_ = bbox.at(i).tl();
         cv::Point br_ = bbox.at(i).br();
 
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
+        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
 
     }
     out.close();
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index d2a9f42ecc9..f41fb7eb1c7 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -122,6 +122,7 @@ class StandarizerPreprocessor: public ImagePreprocessor{
     //void set_mean_(Mat M){}
 
     void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+
         //TODO put all the logic of channel and depth conversions in ImageProcessor class
         CV_Assert(outputChannels==1 || outputChannels==3);
         CV_Assert(input.channels()==1 || input.channels()==3);
@@ -433,6 +434,7 @@ class DeepCNNCaffeImpl: public DeepCNN{
         CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
         CV_Assert(outputMat.isContinuous());
 
+
 #ifdef HAVE_CAFFE
         net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
         net_->Reshape();
@@ -450,16 +452,19 @@ class DeepCNNCaffeImpl: public DeepCNN{
                 input_channels.push_back(netInputWraped);
                 //input_data += width * height;
                 inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+
             }
             this->preprocess(inputImageList[imgNum],preprocessed);
             split(preprocessed, input_channels);
 
+
         }
         this->net_->ForwardPrefilled();
         const float* outputNetData=net_->output_blobs()[0]->cpu_data();
         this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
         int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
 
+
         //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
         float*outputMatData=(float*)(outputMat.data);
         memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@@ -470,9 +475,10 @@ class DeepCNNCaffeImpl: public DeepCNN{
 #ifdef HAVE_CAFFE
     Ptr<caffe::Net<float> > net_;
 #endif
-    //Size inputGeometry_;
+    //Size inputGeometry_;//=Size(100,32);
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     int outputSize_;
+    //Size outputGeometry_;
 public:
     DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -608,7 +614,7 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
             preProcessedImList.push_back(preprocessed);
         }
         // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
+        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
 
         float*outputMatData=(float*)(outputMat.data);
        //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@@ -625,9 +631,16 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
 #ifdef HAVE_DNN
     Ptr<Net> net_;
 #endif
-    //Size inputGeometry_;
+    // hard coding input image size. anything in DNN library to get that from prototxt??
+   // Size inputGeometry_;//=Size(100,32);
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     int outputSize_;
+    //Size outputGeometry_;//= Size(1,1);
+    //int channelCount_;
+   // int inputChannel_ ;//=1;
+    const int _inputHeight =32;
+    const int _inputWidth =100;
+    const int _inputChannel =1;
 public:
     DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -678,33 +691,17 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
             //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
             exit(-1);
         }
-// find a wa to check the followings in cv::dnn ???
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
 
-        //this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
 
-        //caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-        //inputLayerId = net_->getLayerId('data');
-
-      //  inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
-       //                                     inputLayerId,
-      //                                      std::vector<MatShape>* inLayerShapes,
-      //  std::vector<MatShape>* outLayerShapes) const;
-        // should not be hard coded ideally
-
-        this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = 1;//inputLayer->channels();
+        this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = _inputChannel;//inputLayer->channels();
 
         //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        //net_->Reshape();
-        this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
-        this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
+        //std::vector<Mat> blobs = outLayer->blobs;
+
+        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
+        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
 
 
 
@@ -732,7 +729,7 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
         size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
         classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
         Mat outputMat = classProbabilities.getMat();
-        printf("ekhane");
+
         for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
         {
             size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
 }
 
 namespace cnn_config{
+std::vector<std::string> getAvailableBackends()
+{
+    std::vector<std::string> backends;
+
+#ifdef HAVE_CAFFE
+    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
+
+#endif
+#ifdef HAVE_DNN
+    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
+#endif
+    return backends;
+
+
+}
+
 namespace caffe_backend{
 
 #ifdef HAVE_CAFFE
@@ -856,7 +869,7 @@ bool getCaffeAvailable()
 {
     return true;
 }
-#elif defined(HAVE_DNN)
+#else
 
 bool getCaffeGpuMode()
 {
@@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
 bool getCaffeAvailable(){
     return 0;
 }
-bool getDNNAvailable(){
-    return true;
-}
 
+#endif
 
-#else
+}//namespace caffe
+namespace dnn_backend{
+#ifdef  HAVE_DNN
 
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
 
-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
+bool getDNNAvailable(){
+    return true;
 }
-
-bool getCaffeAvailable(){
+#else
+bool getDNNAvailable(){
     return 0;
 }
-
 #endif
-
-}//namespace caffe
+}//namspace dnn_backend
 }//namespace cnn_config
 
 class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@@ -931,6 +935,7 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
             getOutputs(buffer,nbOutputs,tmp);
             classNum=tmp[0].wordIdx;
             confidence=tmp[0].probabillity;
+
         }
     };
 protected:
@@ -972,6 +977,7 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
     {
         Mat netOutput;
         this->classifier_->classifyBatch(inputImageList,netOutput);
+
         for(int k=0;k<netOutput.rows;k++)
         {
             int classNum;
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
index 9b6d4f966a4..949f5f86dc4 100644
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@@ -16,9 +16,9 @@
 #include <vector>
 
 
-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
+//#ifdef HAVE_CAFFE
+//#include "caffe/caffe.hpp"
+//#endif
 
 namespace cv { namespace text {
 
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index a2c583c7f10..90d6fd9b8ee 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -225,75 +225,25 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
 
 #ifdef HAVE_DNN
 
-        //std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
-
         Mat preprocessed;
         this->preprocess(inputImage,preprocessed);
-        printf("After preprocess");
-        // preprocesses each image in the inputImageList and push to preprocessedImList
-//        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-//        {
-//            this->preprocess(inputImageList[imgNum],preprocessed);
-//            preProcessedImList.push_back(preprocessed);
-//        }
-        // set input data blob in dnn::net
-        //Mat temp =blobFromImage(preprocessed,1, Size(700, 700));
-        //printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]);
-        net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data");
-        printf("Input layer");
-
-
-       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
-       Mat outputNet = this->net_->forward( );//"mbox_priorbox");
-       printf("After forward");
-       //outputNet = outputNet.reshape(1, 1);
+
+        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
+
+       Mat outputNet = this->net_->forward( );
+
        this->outputGeometry_.height = outputNet.size[2];
        this->outputGeometry_.width = outputNet.size[3];
        this->outputChannelCount_ = outputNet.size[1];
-       printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]);
+
        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
         float*outputMatData=(float*)(outputMat.data);
        float*outputNetData=(float*)(outputNet.data);
        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
 
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-//        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-//        net_->Reshape();
-//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-//        float* inputData=inputBuffer;
-
-//        std::vector<Mat> input_channels;
-//        Mat preprocessed;
-//        // if the image have multiple color channels the input layer should be populated accordingly
-//        for (int channel=0;channel < this->inputChannelCount_;channel++){
-
-//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-//            input_channels.push_back(netInputWraped);
-//            //input_data += width * height;
-//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-//        }
-//        this->preprocess(inputImage,preprocessed);
-//        split(preprocessed, input_channels);
-
-//        //preprocessed.copyTo(netInputWraped);
 
 
-//        this->net_->Forward();
-//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-
-
-
-
-//        this->outputGeometry_.height = net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-//        float*outputMatData=(float*)(outputMat.data);
-
-//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-
 
 
 #endif
@@ -307,6 +257,9 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
     //Size inputGeometry_;
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     //int outputSize_;
+    const int _inputHeight =700;
+    const int _inputWidth =700;
+    const int _inputChannel =3;
 public:
     DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_){
@@ -355,28 +308,10 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
             //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
             exit(-1);
         }
-//        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
-//        this->inputGeometry_.height = inputLayer->height();
-//        this->inputGeometry_.width = inputLayer->width();
-//        this->inputChannelCount_ = inputLayer->channels();
-//        //this->inputGeometry_.batchSize =1;
-
-//        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-//        net_->Reshape();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        //this->outputGeometry_.batchSize =1;
-//        this->outputGeometry_.height =net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-        this->inputGeometry_.height =700;
-        this->inputGeometry_.width = 700 ;//inputLayer->width();
-        this->inputChannelCount_ = 3 ;//inputLayer->channels();
+
+        this->inputGeometry_.height =_inputHeight;
+        this->inputGeometry_.width = _inputWidth ;//inputLayer->width();
+        this->inputChannelCount_ = _inputChannel ;//inputLayer->channels();
 
 #else
         CV_Error(Error::StsError,"DNN module not available during compilation!");
@@ -389,7 +324,7 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
         Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
         Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
         Mat outputMat = Bbox_prob.getMat();
-        printf("calling");
+
         process_(image.getMat(),outputMat);
         //copy back to outputArray
         outputMat.copyTo(Bbox_prob);
@@ -487,20 +422,20 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
     case OCR_HOLISTIC_BACKEND_DEFAULT:
 
 #ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
 
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
 #else
         CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
         return Ptr<DeepCNNTextDetector>();
 #endif
         break;
     case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
         break;
     case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
          break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default:

From dc48968f1cdcce55643b1df08b6b2d878f18978b Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Tue, 5 Sep 2017 06:16:50 +0200
Subject: [PATCH 19/31] removed blanks, fixed Cmake issue

---
 modules/text/CMakeLists.txt               |  5 ++
 modules/text/README.md                    | 84 +++--------------------
 modules/text/include/opencv2/text/ocr.hpp |  1 -
 modules/text/src/precomp.hpp              |  2 -
 modules/text/src/text_detectorCNN.cpp     |  5 --
 modules/text/text_config.hpp.in           |  9 ---
 6 files changed, 16 insertions(+), 90 deletions(-)

diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt
index f9649ca336f..18173db830b 100644
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@@ -1,5 +1,10 @@
 set(the_description "Text Detection and Recognition")
 
+if(POLICY CMP0023)
+  message(STATUS "Explicitly setting policy CMP0023 to OLD")
+  cmake_policy(SET CMP0023 OLD)
+endif(POLICY CMP0023)
+
 # Using cmake scripts and modules
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
 
diff --git a/modules/text/README.md b/modules/text/README.md
index 2caf58a1e17..fd33980e80e 100644
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -49,20 +49,22 @@ Notes
 3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages.
 
 
-Word spotting CNN
+Text Detection CNN
 =================
 
 Intro
 -----
 
-A word spotting CNN is a CNN that takes an image assumed to contain a single word and provides a probabillity over a given vocabulary.
-Although other backends will be supported, for the moment only the Caffe backend is supported.
+The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.
+
+Two backends are supported 1) caffe 2) opencv-dnn
 
 
 
 
 Instalation of Caffe backend
 ----------------------------
+* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
 The caffe wrapping backend has the requirements caffe does.
 * Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
 The simplest solution is to build caffe without support for OpenCV.
@@ -77,10 +79,8 @@ Sample script for building Caffe
 SRCROOT="${HOME}/caffe_inst/"
 mkdir -p "$SRCROOT"
 cd "$SRCROOT"
-git clone https://github.com/BVLC/caffe.git
-cd caffe
-git checkout 91b09280f5233cafc62954c98ce8bc4c204e7475
-git branch 91b09280f5233cafc62954c98ce8bc4c204e7475
+git clone https://github.com/sghoshcvc/TextBoxes.git
+cd TextBoxes
 cat Makefile.config.example  > Makefile.config
 echo 'USE_OPENCV := 0' >> Makefile.config
 echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
@@ -115,77 +115,15 @@ make distribute
 cd $OPENCV_BUILD_DIR #You must set this
 CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
 
-cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules"   ./
+cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules"   ./
 
 
 ```
-
-Text Detection CNN
-=================
-
-Intro
------
-
-A text detection CNN is a CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there.
-Although other backends will be supported, for the moment only the Caffe backend is supported.
-
-
-
+where $OPECV_CONTRIB is the root directory containing opencv_contrib module
 
 Instalation of Caffe backend
 ----------------------------
-* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
-The caffe wrapping backend has the requirements caffe does.
-* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
-The simplest solution is to build caffe without support for OpenCV.
-* Only the OS supported by Caffe are supported by the backend.
-The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
-Other UNIX systems including OSX should be easy to adapt.
 
-Sample script for building Caffe
+Use of opencv-dnn does not need any additional library.
 
-```bash
-#!/bin/bash
-SRCROOT="${HOME}/caffe_inst/"
-mkdir -p "$SRCROOT"
-cd "$SRCROOT"
-git clone https://github.com/sghoshcvc/TextBoxes.git
-cd TextBoxes
-cat Makefile.config.example  > Makefile.config
-echo 'USE_OPENCV := 0' >> Makefile.config
-echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
-echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
-
-
-echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
-+++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
-@@ -234,6 +234,7 @@
-
-     template <typename T>
-     friend class Net;
-+    virtual ~Callback(){}
-   };
-   const vector<Callback*>& before_forward() const { return before_forward_; }
-   void add_before_forward(Callback* value) {
-">/tmp/cleanup_caffe.diff
-
-patch < /tmp/cleanup_caffe.diff
-
-
-make -j 6
-
-make pycaffe
-
-make distribute
-```
-
-
-```bash
-#!/bin/bash
-cd $OPENCV_BUILD_DIR #You must set this
-CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
-
-cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules"   ./
-
-
-```
+The recent opencv-3.3.0 needs to be build with extra modules to use text module.
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 14dfc092456..b77a3e1321b 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -657,7 +657,6 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas
 
 
 //Classifiers should provide diferent backends
-//For the moment only caffe is implemeted
 
 enum{
     OCR_HOLISTIC_BACKEND_NONE, //No back end
diff --git a/modules/text/src/precomp.hpp b/modules/text/src/precomp.hpp
index c7371db1e79..72a23a9b34a 100644
--- a/modules/text/src/precomp.hpp
+++ b/modules/text/src/precomp.hpp
@@ -45,8 +45,6 @@
 
 #include "opencv2/text.hpp"
 
-//#include "text_config.hpp"
-
 #ifdef HAVE_TESSERACT
 #include <tesseract/baseapi.h>
 #include <tesseract/resultiterator.h>
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 90d6fd9b8ee..a8d04db3722 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -91,7 +91,6 @@ class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
     }
 
 
-
 #ifdef HAVE_CAFFE
     Ptr<caffe::Net<float> > net_;
 #endif
@@ -160,10 +159,6 @@ class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
         this->outputGeometry_.height =net_->output_blobs()[0]->height();
         this->outputGeometry_.width = net_->output_blobs()[0]->width();
 
-
-
-
-
 #else
         CV_Error(Error::StsError,"Caffe not available during compilation!");
 #endif
diff --git a/modules/text/text_config.hpp.in b/modules/text/text_config.hpp.in
index 71b32993acf..81e624bab37 100644
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
@@ -1,13 +1,4 @@
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__
 
-// HAVE QT5
-//#cmakedefine HAVE_QT5GUI
-
-// HAVE CAFFE
-//#cmakedefine HAVE_CAFFE
-
-// HAVE OCR Tesseract
-//#cmakedefine HAVE_TESSERACT
-
 #endif

From af536b13530b34b552592e5b9cb31bdd8a941157 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Tue, 5 Sep 2017 07:02:34 +0200
Subject: [PATCH 20/31] seperate image pre-processing from ocr code

---
 modules/text/src/image_preprocessor.cpp | 387 ++++++++++++++++++++++++
 modules/text/src/ocr_holistic.cpp       | 367 ----------------------
 2 files changed, 387 insertions(+), 367 deletions(-)
 create mode 100644 modules/text/src/image_preprocessor.cpp

diff --git a/modules/text/src/image_preprocessor.cpp b/modules/text/src/image_preprocessor.cpp
new file mode 100644
index 00000000000..3a65a210863
--- /dev/null
+++ b/modules/text/src/image_preprocessor.cpp
@@ -0,0 +1,387 @@
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+#include  "opencv2/highgui.hpp"
+#include "opencv2/core.hpp"
+
+
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <queue>
+#include <algorithm>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace cv { namespace text {
+//************************************************************************************
+//******************   ImagePreprocessor   *******************************************
+//************************************************************************************
+
+void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
+    Mat inpImg=input.getMat();
+    Mat outImg;
+    this->preprocess_(inpImg,outImg,sz,outputChannels);
+    outImg.copyTo(output);
+}
+void ImagePreprocessor::set_mean(Mat mean){
+
+
+    this->set_mean_(mean);
+
+}
+
+
+
+class ResizerPreprocessor: public ImagePreprocessor{
+protected:
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1){
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U){
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+    }
+    //void set_mean_(Mat m){}
+public:
+    ResizerPreprocessor(){}
+    ~ResizerPreprocessor(){}
+};
+
+class StandarizerPreprocessor: public ImagePreprocessor{
+protected:
+    double sigma_;
+    //void set_mean_(Mat M){}
+
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+
+        Scalar mean,dev;
+        meanStdDev(output,mean,dev);
+        subtract(output,mean[0],output);
+        divide(output,(dev[0]/sigma_),output);
+    }
+public:
+    StandarizerPreprocessor(double sigma):sigma_(sigma){}
+    ~StandarizerPreprocessor(){}
+
+};
+
+class customPreprocessor:public ImagePreprocessor{
+protected:
+
+    double rawval_;
+    Mat mean_;
+    String channel_order_;
+
+    void set_mean_(Mat imMean_){
+
+        imMean_.copyTo(this->mean_);
+
+
+    }
+
+    void set_raw_scale(int rawval){
+        rawval_ = rawval;
+
+    }
+    void set_channels(String channel_order){
+        channel_order_=channel_order;
+    }
+
+
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        tmpInput.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        tmpInput.convertTo(output, CV_32FC1);
+                    else
+                        tmpInput.convertTo(output, CV_32FC1,rawval_);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        tmpInput.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        tmpInput.convertTo(output, CV_32FC1);
+                    else
+                        tmpInput.convertTo(output, CV_32FC1,rawval_);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        input.convertTo(output,CV_32FC1,1/255.0);
+                    else
+                        input.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        input.convertTo(output, CV_32FC1);
+                    else
+                        input.convertTo(output, CV_32FC1,rawval_);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        input.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        input.convertTo(output,CV_32FC3);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        input.convertTo(output, CV_32FC3);
+                    else
+                        input.convertTo(output, CV_32FC3,rawval_);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+
+        if (!this->mean_.empty()){
+
+            Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2));
+            subtract(output,mean_s,output);
+        }
+        else{
+            Scalar mean_s;
+            mean_s = mean(output);
+            subtract(output,mean_s,output);
+        }
+
+    }
+
+public:
+    customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){}
+    ~customPreprocessor(){}
+
+};
+
+class MeanSubtractorPreprocessor: public ImagePreprocessor{
+protected:
+    Mat mean_;
+    //void set_mean_(Mat m){}
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+        subtract(output,this->mean_,output);
+    }
+public:
+    MeanSubtractorPreprocessor(Mat mean)
+    {
+        mean.copyTo(this->mean_);
+    }
+
+    ~MeanSubtractorPreprocessor(){}
+};
+
+
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
+{
+    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
+{
+    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
+}
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order)
+{
+
+    return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order));
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
+{
+    Mat tmp=meanImg.getMat();
+    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
+}
+}
+}
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index f41fb7eb1c7..157637c2b2d 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -36,374 +36,7 @@ inline bool fileExists (String filename) {
     return f.good();
 }
 
-//************************************************************************************
-//******************   ImagePreprocessor   *******************************************
-//************************************************************************************
-
-void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
-    Mat inpImg=input.getMat();
-    Mat outImg;
-    this->preprocess_(inpImg,outImg,sz,outputChannels);
-    outImg.copyTo(output);
-}
-void ImagePreprocessor::set_mean(Mat mean){
-
-
-    this->set_mean_(mean);
-
-}
-
-
-
-class ResizerPreprocessor: public ImagePreprocessor{
-protected:
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1){
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U){
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-    }
-    //void set_mean_(Mat m){}
-public:
-    ResizerPreprocessor(){}
-    ~ResizerPreprocessor(){}
-};
-
-class StandarizerPreprocessor: public ImagePreprocessor{
-protected:
-    double sigma_;
-    //void set_mean_(Mat M){}
-
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-
-        Scalar mean,dev;
-        meanStdDev(output,mean,dev);
-        subtract(output,mean[0],output);
-        divide(output,(dev[0]/sigma_),output);
-    }
-public:
-    StandarizerPreprocessor(double sigma):sigma_(sigma){}
-    ~StandarizerPreprocessor(){}
-
-};
-
-class customPreprocessor:public ImagePreprocessor{
-protected:
-
-    double rawval_;
-    Mat mean_;
-    String channel_order_;
-
-    void set_mean_(Mat imMean_){
-
-        imMean_.copyTo(this->mean_);
-
-
-    }
-
-    void set_raw_scale(int rawval){
-        rawval_ = rawval;
-
-    }
-    void set_channels(String channel_order){
-        channel_order_=channel_order;
-    }
-
 
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        tmpInput.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        tmpInput.convertTo(output, CV_32FC1);
-                    else
-                        tmpInput.convertTo(output, CV_32FC1,rawval_);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        tmpInput.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        tmpInput.convertTo(output, CV_32FC1);
-                    else
-                        tmpInput.convertTo(output, CV_32FC1,rawval_);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        input.convertTo(output,CV_32FC1,1/255.0);
-                    else
-                        input.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        input.convertTo(output, CV_32FC1);
-                    else
-                        input.convertTo(output, CV_32FC1,rawval_);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        input.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        input.convertTo(output,CV_32FC3);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        input.convertTo(output, CV_32FC3);
-                    else
-                        input.convertTo(output, CV_32FC3,rawval_);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-
-        if (!this->mean_.empty()){
-
-            Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2));
-            subtract(output,mean_s,output);
-        }
-        else{
-            Scalar mean_s;
-            mean_s = mean(output);
-            subtract(output,mean_s,output);
-        }
-
-    }
-
-public:
-    customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){}
-    ~customPreprocessor(){}
-
-};
-
-class MeanSubtractorPreprocessor: public ImagePreprocessor{
-protected:
-    Mat mean_;
-    //void set_mean_(Mat m){}
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-        subtract(output,this->mean_,output);
-    }
-public:
-    MeanSubtractorPreprocessor(Mat mean)
-    {
-        mean.copyTo(this->mean_);
-    }
-
-    ~MeanSubtractorPreprocessor(){}
-};
-
-
-
-
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
-{
-    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
-{
-    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
-}
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order)
-{
-
-    return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order));
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
-{
-    Mat tmp=meanImg.getMat();
-    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
-}
 
 //************************************************************************************
 //******************   TextImageClassifier   *****************************************

From efc864c5fe68bc526aa57e9245af3489c3358c2a Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sghosh@cvc.uab.es>
Date: Fri, 15 Sep 2017 21:00:26 +0200
Subject: [PATCH 21/31] removed hard coding height and width

---
 modules/text/CMakeLists.txt       |  6 ++++--
 modules/text/src/ocr_holistic.cpp | 14 +++++++-------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt
index 18173db830b..b58fd41cf1d 100644
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@@ -59,8 +59,10 @@ if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
   list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
   find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
   include_directories(SYSTEM ${Boost_INCLUDE_DIR})
-  include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
-  link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
+  include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
+  link_directories(SYSTEM ${CUDA_LIBS})
+ # include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
+  #link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
   list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
   target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
   add_definitions(-DHAVE_CAFFE)
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index 157637c2b2d..cd24f3a9616 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -271,9 +271,9 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
     //Size outputGeometry_;//= Size(1,1);
     //int channelCount_;
    // int inputChannel_ ;//=1;
-    const int _inputHeight =32;
-    const int _inputWidth =100;
-    const int _inputChannel =1;
+    //const int _inputHeight =32;
+    //const int _inputWidth =100;
+    //const int _inputChannel =1;
 public:
     DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -300,8 +300,8 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
         //Implemented to supress Visual Studio warning "assignment operator could not be generated"
     }
 
-    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
+    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth =100,int inputHeight = 32)
+        :minibatchSz_(maxMinibatchSz),_inputWidth(inputWidth),_inputHeight(inputHeight)
     {
 
         CV_Assert(this->minibatchSz_>0);
@@ -612,13 +612,13 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
         this->classifier_->classifyBatch(inputImageList,netOutput);
 
         for(int k=0;k<netOutput.rows;k++)
-        {
+        {https://www.google.es/?gfe_rd=cr&dcr=0&ei=4fq7We8Bk9jyB8zPp5AL
             int classNum;
             double confidence;
             NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence);
             transcriptionVec.push_back(this->labels_[classNum]);
             confidenceVec.push_back(confidence);
-        }
+        }https://www.google.es/?gfe_rd=cr&dcr=0&ei=4fq7We8Bk9jyB8zPp5AL
     }
 
 

From 887e6e5ed6c8967a3ac2a61d7e106022ba99fcf4 Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sghosh@cvc.uab.es>
Date: Sun, 17 Sep 2017 20:57:52 +0200
Subject: [PATCH 22/31] removed hard codinginput parameters

---
 modules/text/src/ocr_holistic.cpp     | 14 +++++++-------
 modules/text/src/text_detectorCNN.cpp | 16 ++++++++--------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index cd24f3a9616..8e0bae0073e 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -271,9 +271,9 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
     //Size outputGeometry_;//= Size(1,1);
     //int channelCount_;
    // int inputChannel_ ;//=1;
-    //const int _inputHeight =32;
-    //const int _inputWidth =100;
-    //const int _inputChannel =1;
+    int _inputHeight;
+    int _inputWidth ;
+    int _inputChannel ;
 public:
     DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -300,8 +300,8 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
         //Implemented to supress Visual Studio warning "assignment operator could not be generated"
     }
 
-    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth =100,int inputHeight = 32)
-        :minibatchSz_(maxMinibatchSz),_inputWidth(inputWidth),_inputHeight(inputHeight)
+    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth =100,int inputHeight = 32,int inputChannel =1)
+        :minibatchSz_(maxMinibatchSz),_inputWidth(inputWidth),_inputHeight(inputHeight),_inputChannel(inputChannel)
     {
 
         CV_Assert(this->minibatchSz_>0);
@@ -612,13 +612,13 @@ class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
         this->classifier_->classifyBatch(inputImageList,netOutput);
 
         for(int k=0;k<netOutput.rows;k++)
-        {https://www.google.es/?gfe_rd=cr&dcr=0&ei=4fq7We8Bk9jyB8zPp5AL
+        {
             int classNum;
             double confidence;
             NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence);
             transcriptionVec.push_back(this->labels_[classNum]);
             confidenceVec.push_back(confidence);
-        }https://www.google.es/?gfe_rd=cr&dcr=0&ei=4fq7We8Bk9jyB8zPp5AL
+        }
     }
 
 
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index a8d04db3722..9b2e61ac6f4 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -252,9 +252,9 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
     //Size inputGeometry_;
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     //int outputSize_;
-    const int _inputHeight =700;
-    const int _inputWidth =700;
-    const int _inputChannel =3;
+    int inputHeight_;
+    int inputWidth_;
+    int inputChannel_;
 public:
     DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_){
@@ -282,8 +282,8 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
         //Implemented to supress Visual Studio warning "assignment operator could not be generated"
     }
 
-    DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
+    DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3)
+        :minibatchSz_(maxMinibatchSz),inputHeight_(inputHeight),inputWidth_(inputWidth),inputChannel_(inputChannel)
     {
 
         CV_Assert(this->minibatchSz_>0);
@@ -304,9 +304,9 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
             exit(-1);
         }
 
-        this->inputGeometry_.height =_inputHeight;
-        this->inputGeometry_.width = _inputWidth ;//inputLayer->width();
-        this->inputChannelCount_ = _inputChannel ;//inputLayer->channels();
+        this->inputGeometry_.height =inputHeight_;
+        this->inputGeometry_.width = inputWidth_ ;//inputLayer->width();
+        this->inputChannelCount_ = inputChannel_ ;//inputLayer->channels();
 
 #else
         CV_Error(Error::StsError,"DNN module not available during compilation!");

From 878258bc13f724071968e3a6cbb89d1c6fe63b7f Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sghosh@cvc.uab.es>
Date: Sun, 17 Sep 2017 21:56:08 +0200
Subject: [PATCH 23/31]  modified initializers

---
 modules/text/src/ocr_holistic.cpp     | 22 +++++++++++-----------
 modules/text/src/text_detectorCNN.cpp |  8 ++++----
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index 8e0bae0073e..035f104f28a 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -271,9 +271,9 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
     //Size outputGeometry_;//= Size(1,1);
     //int channelCount_;
    // int inputChannel_ ;//=1;
-    int _inputHeight;
-    int _inputWidth ;
-    int _inputChannel ;
+  //  int _inputHeight;
+    //int _inputWidth ;
+    //int _inputChannel ;
 public:
     DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -300,8 +300,8 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
         //Implemented to supress Visual Studio warning "assignment operator could not be generated"
     }
 
-    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth =100,int inputHeight = 32,int inputChannel =1)
-        :minibatchSz_(maxMinibatchSz),_inputWidth(inputWidth),_inputHeight(inputHeight),_inputChannel(inputChannel)
+    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth ,int inputHeight ,int inputChannel )
+        :minibatchSz_(maxMinibatchSz)
     {
 
         CV_Assert(this->minibatchSz_>0);
@@ -326,8 +326,8 @@ class DeepCNNOpenCvDNNImpl: public DeepCNN{
         }
 
 
-        this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = _inputChannel;//inputLayer->channels();
+        this->inputGeometry_=Size(inputWidth,inputHeight);// Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = inputChannel;//inputLayer->channels();
 
         //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
         Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
@@ -408,7 +408,7 @@ Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<Imag
         return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
 
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
 #else
         CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
         return Ptr<DeepCNN>();
@@ -419,7 +419,7 @@ Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<Imag
         return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
         break;
   case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
         break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default:
@@ -440,7 +440,7 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
         return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
 
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
 #else
         CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
         return Ptr<DeepCNN>();
@@ -451,7 +451,7 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
         return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
         break;
    case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
         break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default:
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 9b2e61ac6f4..87f132850ae 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -376,7 +376,7 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String
         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
 
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
 #else
         CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
         return Ptr<DeepCNNTextDetector>();
@@ -387,7 +387,7 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String
         break;
 
     case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
         break;
 
     case OCR_HOLISTIC_BACKEND_NONE:
@@ -420,7 +420,7 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
 
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
 #else
         CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
         return Ptr<DeepCNNTextDetector>();
@@ -430,7 +430,7 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
         break;
     case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
+         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
          break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default:

From bf630bef4ee22d35eae18eac2487ffe368be71e7 Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sghosh@cvc.uab.es>
Date: Mon, 18 Sep 2017 12:16:25 +0200
Subject: [PATCH 24/31] Modified initializers list

---
 modules/text/src/text_detectorCNN.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 87f132850ae..5267b390fed 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -252,9 +252,9 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
     //Size inputGeometry_;
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     //int outputSize_;
-    int inputHeight_;
-    int inputWidth_;
-    int inputChannel_;
+    //int inputHeight_;
+    //int inputWidth_;
+    //int inputChannel_;
 public:
     DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_){
@@ -283,7 +283,7 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
     }
 
     DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3)
-        :minibatchSz_(maxMinibatchSz),inputHeight_(inputHeight),inputWidth_(inputWidth),inputChannel_(inputChannel)
+        :minibatchSz_(maxMinibatchSz)
     {
 
         CV_Assert(this->minibatchSz_>0);
@@ -304,9 +304,9 @@ class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
             exit(-1);
         }
 
-        this->inputGeometry_.height =inputHeight_;
-        this->inputGeometry_.width = inputWidth_ ;//inputLayer->width();
-        this->inputChannelCount_ = inputChannel_ ;//inputLayer->channels();
+        this->inputGeometry_.height =inputHeight;
+        this->inputGeometry_.width = inputWidth ;//inputLayer->width();
+        this->inputChannelCount_ = inputChannel ;//inputLayer->channels();
 
 #else
         CV_Error(Error::StsError,"DNN module not available during compilation!");

From 951e18272dcf13ecede1e5c3b7d9f2b2b0e3c456 Mon Sep 17 00:00:00 2001
From: Vladislav Sovrasov <sovrasov.vlad@gmail.com>
Date: Thu, 5 Oct 2017 16:42:30 +0300
Subject: [PATCH 25/31] text: cleanup dnn text detection part

---
 modules/text/CMakeLists.txt                   |  98 +--
 modules/text/FindCaffe.cmake                  |  14 -
 modules/text/FindGlog.cmake                   |  10 -
 modules/text/FindProtobuf.cmake               |  10 -
 modules/text/FindTesseract.cmake              |  22 -
 modules/text/README.md                        |  71 --
 modules/text/cmake/FindTesseract.cmake        |   3 +
 modules/text/include/opencv2/text.hpp         |   2 +-
 .../text/include/opencv2/text/erfilter.hpp    |   1 -
 modules/text/include/opencv2/text/ocr.hpp     | 764 +++---------------
 .../include/opencv2/text/textDetector.hpp     | 248 +-----
 modules/text/samples/deeptextdetection.py     |  68 +-
 modules/text/samples/textbox_demo.cpp         | 157 ++--
 modules/text/src/image_preprocessor.cpp       | 387 ---------
 modules/text/src/ocr_holistic.cpp             | 697 ----------------
 modules/text/src/precomp.hpp                  |   2 +
 modules/text/src/text_detector.cpp            | 169 ----
 modules/text/src/text_detectorCNN.cpp         | 480 ++---------
 modules/text/text_config.hpp.in               |   3 +
 19 files changed, 308 insertions(+), 2898 deletions(-)
 delete mode 100644 modules/text/FindCaffe.cmake
 delete mode 100755 modules/text/FindGlog.cmake
 delete mode 100644 modules/text/FindProtobuf.cmake
 delete mode 100644 modules/text/FindTesseract.cmake
 delete mode 100644 modules/text/src/image_preprocessor.cpp
 delete mode 100644 modules/text/src/ocr_holistic.cpp
 delete mode 100644 modules/text/src/text_detector.cpp

diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt
index b58fd41cf1d..5d0f89f0da0 100644
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@@ -1,84 +1,24 @@
 set(the_description "Text Detection and Recognition")
-
-if(POLICY CMP0023)
-  message(STATUS "Explicitly setting policy CMP0023 to OLD")
-  cmake_policy(SET CMP0023 OLD)
-endif(POLICY CMP0023)
-
-# Using cmake scripts and modules
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-
-set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
-
-find_package(Caffe)
-if(Caffe_FOUND)
-  message(STATUS "Caffe:   YES")
-  set(HAVE_CAFFE 1)
-else()
-  message(STATUS "Caffe:   NO")
-#  list(APPEND TEXT_DEPS opencv_dnn)
-endif()
-
-#internal dependencies
-find_package(Protobuf)
-if(Protobuf_FOUND)
-  message(STATUS "Protobuf:   YES")
-  set(HAVE_PROTOBUF 1)
-else()
-  message(STATUS "Protobuf:   NO")
-endif()
-
-find_package(Glog)
-if(Glog_FOUND)
-  message(STATUS "Glog:   YES")
-  set(HAVE_GLOG 1)
-else()
-  message(STATUS "Glog:   NO")
-endif()
-
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
-#ocv_define_module(text ${TEXT_DEPS} WRAP python)
-
-#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
-
-find_package(Tesseract)
-if(${Tesseract_FOUND})
-  message(STATUS "Tesseract:   YES")
-  include_directories(${Tesseract_INCLUDE_DIR})
-  target_link_libraries(opencv_text ${Tesseract_LIBS})
-  add_definitions(-DHAVE_TESSERACT)
-else()
-  message(STATUS "Tesseract:   NO")
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
+
+if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
+  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
+  find_package(Tesseract QUIET)
+  if(Tesseract_FOUND)
+    message(STATUS "Tesseract:   YES")
+    set(HAVE_TESSERACT 1)
+    ocv_include_directories(${Tesseract_INCLUDE_DIR})
+    ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
+  else()
+    message(STATUS "Tesseract:   NO")
   endif()
+endif()
 
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
+               ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
 
-if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
-  include_directories(${Caffe_INCLUDE_DIR})
-  find_package(HDF5 COMPONENTS HL REQUIRED)
-  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
-  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
-  find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
-  include_directories(SYSTEM ${Boost_INCLUDE_DIR})
-  include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
-  link_directories(SYSTEM ${CUDA_LIBS})
- # include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
-  #link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
-  list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
-  target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
-  add_definitions(-DHAVE_CAFFE)
-endif() #HAVE_CAFFE
-
-message(STATUS "TEXT CAFFE SEARCH")
-if()
-  message(STATUS "TEXT NO CAFFE CONFLICT")
-else()
-  message(STATUS "TEXT CAFFE CONFLICT")
-endif()
+ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
-if(HAVE_opencv_dnn)
-	message(STATUS "dnn module found")
-	add_definitions(-DHAVE_DNN)
-	set(HAVE_DNN 1)
-else()
-	message(STATUS "dnn module not found")
-endif()
+ocv_add_testdata(samples/ contrib/text
+    FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
+)
diff --git a/modules/text/FindCaffe.cmake b/modules/text/FindCaffe.cmake
deleted file mode 100644
index 12948f62992..00000000000
--- a/modules/text/FindCaffe.cmake
+++ /dev/null
@@ -1,14 +0,0 @@
-# Caffe package for CNN Triplet training
-unset(Caffe_FOUND)
-
-find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
-  HINTS
-  /usr/local/include)
-
-find_library(Caffe_LIBS NAMES caffe
-  HINTS
-  /usr/local/lib)
-
-if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
-    set(Caffe_FOUND 1)
-endif()
diff --git a/modules/text/FindGlog.cmake b/modules/text/FindGlog.cmake
deleted file mode 100755
index c30e9f4a6ab..00000000000
--- a/modules/text/FindGlog.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-#Required for Caffe
-unset(Glog_FOUND)
-
-find_library(Glog_LIBS NAMES glog
-  HINTS
-  /usr/local/lib)
-
-if(Glog_LIBS)
-    set(Glog_FOUND 1)
-endif()
diff --git a/modules/text/FindProtobuf.cmake b/modules/text/FindProtobuf.cmake
deleted file mode 100644
index 6d0ad56a1f7..00000000000
--- a/modules/text/FindProtobuf.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-#Protobuf package required for Caffe
-unset(Protobuf_FOUND)
-
-find_library(Protobuf_LIBS NAMES protobuf
-  HINTS
-  /usr/local/lib)
-
-if(Protobuf_LIBS)
-    set(Protobuf_FOUND 1)
-endif()
diff --git a/modules/text/FindTesseract.cmake b/modules/text/FindTesseract.cmake
deleted file mode 100644
index 01835e61bc7..00000000000
--- a/modules/text/FindTesseract.cmake
+++ /dev/null
@@ -1,22 +0,0 @@
-# Tesseract OCR
-unset(Tesseract_FOUND)
-
-find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
-  HINTS
-  /usr/include
-  /usr/local/include)
-
-find_library(Tesseract_LIBRARY NAMES tesseract
-  HINTS
-  /usr/lib
-  /usr/local/lib)
-
-find_library(Lept_LIBRARY NAMES lept
-  HINTS
-  /usr/lib
-  /usr/local/lib)
-
-set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
-if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
-    set(Tesseract_FOUND 1)
-endif()
diff --git a/modules/text/README.md b/modules/text/README.md
index fd33980e80e..b6955fd9847 100644
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -56,74 +56,3 @@ Intro
 -----
 
 The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.
-
-Two backends are supported 1) caffe 2) opencv-dnn
-
-
-
-
-Instalation of Caffe backend
-----------------------------
-* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
-The caffe wrapping backend has the requirements caffe does.
-* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
-The simplest solution is to build caffe without support for OpenCV.
-* Only the OS supported by Caffe are supported by the backend.
-The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
-Other UNIX systems including OSX should be easy to adapt.
-
-Sample script for building Caffe
-
-```bash
-#!/bin/bash
-SRCROOT="${HOME}/caffe_inst/"
-mkdir -p "$SRCROOT"
-cd "$SRCROOT"
-git clone https://github.com/sghoshcvc/TextBoxes.git
-cd TextBoxes
-cat Makefile.config.example  > Makefile.config
-echo 'USE_OPENCV := 0' >> Makefile.config
-echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
-echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
-
-
-echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
-+++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
-@@ -234,6 +234,7 @@
-
-     template <typename T>
-     friend class Net;
-+    virtual ~Callback(){}
-   };
-   const vector<Callback*>& before_forward() const { return before_forward_; }
-   void add_before_forward(Callback* value) {
-">/tmp/cleanup_caffe.diff
-
-patch < /tmp/cleanup_caffe.diff
-
-
-make -j 6
-
-make pycaffe
-
-make distribute
-```
-
-
-```bash
-#!/bin/bash
-cd $OPENCV_BUILD_DIR #You must set this
-CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
-
-cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules"   ./
-
-
-```
-where $OPECV_CONTRIB is the root directory containing opencv_contrib module
-
-Instalation of Caffe backend
-----------------------------
-
-Use of opencv-dnn does not need any additional library.
-
-The recent opencv-3.3.0 needs to be build with extra modules to use text module.
diff --git a/modules/text/cmake/FindTesseract.cmake b/modules/text/cmake/FindTesseract.cmake
index 2a5d868f91f..5bdbe243616 100644
--- a/modules/text/cmake/FindTesseract.cmake
+++ b/modules/text/cmake/FindTesseract.cmake
@@ -5,14 +5,17 @@ endif()
 if(NOT Tesseract_FOUND)
   find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
     HINTS
+    /usr/include
     /usr/local/include)
 
   find_library(Tesseract_LIBRARY NAMES tesseract
     HINTS
+    /usr/lib
     /usr/local/lib)
 
   find_library(Lept_LIBRARY NAMES lept
     HINTS
+    /usr/lib
     /usr/local/lib)
 
   if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY)
diff --git a/modules/text/include/opencv2/text.hpp b/modules/text/include/opencv2/text.hpp
index 85b8b741982..c06c889838c 100644
--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
 in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
 
 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
 
     @defgroup text_recognize Scene Text Recognition
   @}
diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp
index 2bd1c56a356..c9bac2b3272 100644
--- a/modules/text/include/opencv2/text/erfilter.hpp
+++ b/modules/text/include/opencv2/text/erfilter.hpp
@@ -65,7 +65,6 @@ component tree of the image. :
  */
 struct CV_EXPORTS ERStat
 {
-
 public:
     //! Constructor
     explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index df9c2b4aa59..22c98448cf1 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -44,12 +44,10 @@
 #ifndef __OPENCV_TEXT_OCR_HPP__
 #define __OPENCV_TEXT_OCR_HPP__
 
+#include <opencv2/core.hpp>
+
 #include <vector>
 #include <string>
-#include <iostream>
-#include <sstream>
-
-
 
 namespace cv
 {
@@ -91,100 +89,61 @@ enum ocr_engine_mode
 };
 
 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
-
 class CV_EXPORTS_W BaseOCR
 {
- public:
+public:
     virtual ~BaseOCR() {};
-
-    virtual void run(Mat& image, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0) = 0;
-
-    virtual void run(Mat& image, Mat& mask, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0) = 0;
-
-    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
-     * default parameters for all parameters other than the input image.
-     */
-    virtual String run(InputArray image){
-        std::string res;
-        std::vector<Rect> component_rects;
-        std::vector<float> component_confidences;
-        std::vector<std::string> component_texts;
-        Mat inputImage=image.getMat();
-        this->run(inputImage,res,&component_rects,&component_texts,
-                  &component_confidences,OCR_LEVEL_WORD);
-        return res;
-    }
-
 };
 
-/** @brief OCRTesseract class provides an interface with the tesseract-ocr API
- * (v3.02.02) in C++.
+/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
 
 Notice that it is compiled only when tesseract-ocr is correctly installed.
 
 @note
-   -   (C++) An example of OCRTesseract recognition combined with scene text
-        detection can be found at the end_to_end_recognition demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
-    -   (C++) Another example of OCRTesseract recognition combined with scene
-        text detection can be found at the webcam_demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+   -   (C++) An example of OCRTesseract recognition combined with scene text detection can be found
+        at the end_to_end_recognition demo:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
+    -   (C++) Another example of OCRTesseract recognition combined with scene text detection can be
+        found at the webcam_demo:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
 class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
 public:
     /** @brief Recognize text using the tesseract-ocr API.
 
-    Takes image on input and returns recognized text in the output_text
-    parameter. Optionally provides also the Rects for individual text elements
-    found (e.g. words), and the list of those text elements with their
-    confidence values.
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
 
     @param image Input image CV_8UC1 or CV_8UC3
-
     @param output_text Output text of the tesseract-ocr.
-
-    @param component_rects If provided the method will output a list of Rects
-    for the individual text elements found (e.g. words or text lines).
-
-    @param component_texts If provided the method will output a list of text
-    strings for the recognition of individual text elements found (e.g. words or
-    text lines).
-
-    @param component_confidences If provided the method will output a list of
-    confidence values for the recognition of individual text elements found
-    (e.g. words or text lines).
-
-    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
-
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words or text lines).
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words or text lines).
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words or text lines).
+    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE.
      */
-    using BaseOCR::run;
-    virtual void run (Mat& image, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0);
 
-    virtual void run (Mat& image, Mat& mask, std::string& output_text,
-                      std::vector<Rect>* component_rects=NULL,
-                      std::vector<std::string>* component_texts=NULL,
-                      std::vector<float>* component_confidences=NULL,
-                      int component_level=0);
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);
 
     // aliases for scripting
-    CV_WRAP String run (InputArray image, int min_confidence,
-                        int component_level=0);
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
 
-    CV_WRAP String run(InputArray image, InputArray mask,
-                       int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
 
     CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
 
@@ -205,7 +164,6 @@ class CV_EXPORTS_W OCRTesseract : public BaseOCR
      */
     CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
                                     const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO);
-
 };
 
 
@@ -225,19 +183,19 @@ enum classifier_type
 
 /** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
 
-
- * @note
- * -   (C++) An example on using OCRHMMDecoder recognition combined with scene
- *     text detection can be found at the webcam_demo sample:
- *      <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+@note
+   -   (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can
+        be found at the webcam_demo sample:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
-class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
- public:
+class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
+{
+public:
 
     /** @brief Callback with the character classifier is made a class.
 
-    * This way it hides the feature extractor and the classifier itself, so
-    * developers can write their own OCR code.
+    This way it hides the feature extractor and the classifier itself, so developers can write
+    their own OCR code.
 
     The default character classifier and feature extractor can be loaded using the utility function
     loadOCRHMMClassifierNM and KNN model provided in
@@ -246,120 +204,92 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
     class CV_EXPORTS_W ClassifierCallback
     {
     public:
-
         virtual ~ClassifierCallback() { }
-        /** @brief The character classifier must return a (ranked list of)
-         * class(es) id('s)
-
-         * @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
-         * @param out_class The classifier returns the character class
-         * categorical label, or list of class labels, to which the input image
-         * corresponds.
+        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
 
-         * @param out_confidence The classifier returns the probability of the
-         * input image corresponding to each classes in out_class.
+        @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
+        @param out_class The classifier returns the character class categorical label, or list of
+        class labels, to which the input image corresponds.
+        @param out_confidence The classifier returns the probability of the input image
+        corresponding to each classes in out_class.
          */
-        virtual void eval (InputArray image, std::vector<int>& out_class,
-                           std::vector<double>& out_confidence);
+        virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence);
     };
 
+public:
     /** @brief Recognize text using HMM.
 
-    * Takes binary image on input and returns recognized text in the output_text
-    * parameter. Optionally provides also the Rects for individual text elements
-    * found (e.g. words), and the list of those text elements with their
-    * confidence values.
+    Takes binary image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
 
-    * @param image Input binary image CV_8UC1 with a single text line (or word).
+    @param image Input binary image CV_8UC1 with a single text line (or word).
 
-    * @param output_text Output text. Most likely character sequence found by
-    * the HMM decoder.
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.
 
-    * @param component_rects If provided the method will output a list of Rects
-    * for the individual text elements found (e.g. words).
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words).
 
-    * @param component_texts If provided the method will output a list of text
-    * strings for the recognition of individual text elements found (e.g. words).
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words).
 
-    * @param component_confidences If provided the method will output a list of
-    * confidence values for the recognition of individual text elements found
-    * (e.g. words).
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words).
 
-    * @param component_level Only OCR_LEVEL_WORD is supported.
-    */
-    using BaseOCR::run;
-    virtual void run (Mat& image, std::string& output_text,
-                      std::vector<Rect>* component_rects=NULL,
-                      std::vector<std::string>* component_texts=NULL,
-                      std::vector<float>* component_confidences=NULL,
-                      int component_level=0);
+    @param component_level Only OCR_LEVEL_WORD is supported.
+     */
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);
 
     /** @brief Recognize text using HMM.
 
-    * Takes an image and a mask (where each connected component corresponds to a
-    * segmented character) on input and returns recognized text in the
-    * output_text parameter. Optionally provides also the Rects for individual
-    * text elements found (e.g. words), and the list of those text elements with
-    * their confidence values.
-
-    * @param image Input image CV_8UC1 or CV_8UC3 with a single text line
-    * (or word).
+    Takes an image and a mask (where each connected component corresponds to a segmented character)
+    on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
 
-    * @param mask Input binary image CV_8UC1 same size as input image. Each
-    * connected component in mask corresponds to a segmented character in the
-    * input image.
+    @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word).
+    @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image.
 
-    * @param output_text Output text. Most likely character sequence found by
-    * the HMM decoder.
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.
 
-    * @param component_rects If provided the method will output a list of Rects
-    * for the individual text elements found (e.g. words).
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words).
 
-    * @param component_texts If provided the method will output a list of text
-    * strings for the recognition of individual text elements found (e.g. words).
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words).
 
-    * @param component_confidences If provided the method will output a list of
-    * confidence values for the recognition of individual text elements found
-    * (e.g. words).
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words).
 
-    * @param component_level Only OCR_LEVEL_WORD is supported.
-    */
-    virtual void run(Mat& image, Mat& mask, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    @param component_level Only OCR_LEVEL_WORD is supported.
+     */
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0);
 
     // aliases for scripting
-    CV_WRAP String run(InputArray image,
-                       int min_confidence,
-                       int component_level=0);
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
 
-    CV_WRAP String run(InputArray image,
-                       InputArray mask,
-                       int min_confidence,
-                       int component_level=0);
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
 
-    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes
-     * HMMDecoder.
+    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
 
-     * @param classifier The character classifier with built in feature
-     * extractor.
+    @param classifier The character classifier with built in feature extractor.
 
-     * @param vocabulary The language vocabulary (chars when ascii english text)
-     * . vocabulary.size() must be equal to the number of classes of the
-     * classifier.
+    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
+    must be equal to the number of classes of the classifier.
 
-     * @param transition_probabilities_table Table with transition probabilities
-     * between character pairs. cols == rows == vocabulary.size().
+    @param transition_probabilities_table Table with transition probabilities between character
+    pairs. cols == rows == vocabulary.size().
 
-     * @param emission_probabilities_table Table with observation emission
-     * probabilities. cols == rows == vocabulary.size().
+    @param emission_probabilities_table Table with observation emission probabilities. cols ==
+    rows == vocabulary.size().
 
-     * @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available
-     * for the moment (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
+    @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
+    (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
      */
-
     static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                      const std::string& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                        //     size() must be equal to the number of classes
@@ -402,11 +332,9 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
     decoder_mode mode;
 };
 
-/** @brief Allow to implicitly load the default character classifier when
- * creating an OCRHMMDecoder object.
-
- @param filename The XML or YAML file with the classifier model (e.g.OCRHMM_knn_model_data.xml)
+/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
 
+@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
 
 The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann &
 Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a
@@ -416,16 +344,11 @@ using a KNN model trained with synthetic data of rendered characters with differ
 types.
 
 @deprecated loadOCRHMMClassifier instead
-
  */
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM (
-        const String& filename);
 
-/** @brief Allow to implicitly load the default character classifier when
- * creating an OCRHMMDecoder object.
-
- @param filename The XML or YAML file with the classifier model (e.g.OCRBeamSearch_CNN_model_data.xml.gz)
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
 
+/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
 
 @param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)
 
@@ -435,10 +358,8 @@ a linear classifier. It is applied to the input image in a sliding window fashio
 at each window location.
 
 @deprecated use loadOCRHMMClassifier instead
-
  */
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
-        const String& filename);
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
 
 /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
 
@@ -450,64 +371,49 @@ CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& filename, int classifier);
 //! @}
 
-
 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
  *
  * @param vocabulary The language vocabulary (chars when ASCII English text).
  *
  * @param lexicon The list of words that are expected to be found in a particular image.
-
- * @param transition_probabilities_table Output table with transition
- * probabilities between character pairs. cols == rows == vocabulary.size().
-
- * The function calculate frequency statistics of character pairs from the given
- * lexicon and fills the output transition_probabilities_table with them. The
- * transition_probabilities_table can be used as input in the
- * OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+ *
+ * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+ *
+ * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
  * @note
- *    -   (C++) An alternative would be to load the default generic language
- *        transition table provided in the text module samples folder (created
- *        from ispell 42869 english words list) :
- *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
+ *            <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
  **/
-CV_EXPORTS void createOCRHMMTransitionsTable (
-        std::string& vocabulary, std::vector<std::string>& lexicon,
-        OutputArray transition_probabilities_table);
+CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
+
+CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
 
-CV_EXPORTS_W Mat createOCRHMMTransitionsTable (
-        const String& vocabulary, std::vector<cv::String>& lexicon);
 
 /* OCR BeamSearch Decoder */
 
-/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam
- * Search algorithm.
+/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
 
 @note
-   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with
-        scene text detection can be found at the demo sample:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
+        be found at the demo sample:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
  */
-
-
-/* Forward declaration of class that can be used to generate an OCRBeamSearchDecoder::ClassifierCallbac */
-class TextImageClassifier;
-
-class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
-
- public:
+class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
+{
+public:
 
     /** @brief Callback with the character classifier is made a class.
 
-     * This way it hides the feature extractor and the classifier itself, so
-     * developers can write their own OCR code.
+    This way it hides the feature extractor and the classifier itself, so developers can write
+    their own OCR code.
 
-     * The default character classifier and feature extractor can be loaded
-     * using the utility funtion loadOCRBeamSearchClassifierCNN with all its
-     * parameters provided in
-     * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
+    The default character classifier and feature extractor can be loaded using the utility funtion
+    loadOCRBeamSearchClassifierCNN with all its parameters provided in
+    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
      */
-    class CV_EXPORTS_W ClassifierCallback{
-     public:
+    class CV_EXPORTS_W ClassifierCallback
+    {
+    public:
         virtual ~ClassifierCallback() { }
         /** @brief The character classifier must return a (ranked list of) class(es) id('s)
 
@@ -519,8 +425,8 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
          */
         virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
 
-        virtual int getWindowSize() {return 0;}
-        virtual int getStepSize() {return 0;}
+        int getWindowSize() {return 0;}
+        int getStepSize() {return 0;}
     };
 
 public:
@@ -545,7 +451,6 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
 
     @param component_level Only OCR_LEVEL_WORD is supported.
      */
-    using BaseOCR::run;
     virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0);
@@ -577,7 +482,6 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
 
     @param beam_size Size of the beam in Beam Search algorithm.
      */
-
     static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                      const std::string& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                        //     size() must be equal to the number of classes
@@ -598,29 +502,10 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
                                      int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                      int beam_size = 500);                              // Size of the beam in Beam Search algorithm
 
-
-
-
-
     /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path.
 
     @overload
 
-    @param filename path to a character classifier file
-
-    @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size()
-    must be equal to the number of classes of the classifier..
-
-    @param transition_probabilities_table Table with transition probabilities between character
-    pairs. cols == rows == vocabulary.size().
-
-    @param emission_probabilities_table Table with observation emission probabilities. cols ==
-    rows == vocabulary.size().
-
-    @param mode HMM Decoding algorithm (only Viterbi for the moment)
-
-    @param beam_size Size of the beam in Beam Search algorithm
-
      */
     CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const String& filename, // The character classifier file
                                      const String& vocabulary,                    // The language vocabulary (chars when ASCII English text)
@@ -631,7 +516,6 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
                                                                                        //     cols == rows == vocabulary.size()
                                      int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                      int beam_size = 500);
-
 protected:
 
     Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@@ -656,402 +540,6 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas
 
 //! @}
 
-
-//Classifiers should provide diferent backends
-
-enum{
-    OCR_HOLISTIC_BACKEND_NONE, //No back end
-    OCR_HOLISTIC_BACKEND_DNN, // dnn backend opencv_dnn
-    OCR_HOLISTIC_BACKEND_CAFFE, // caffe based backend
-    OCR_HOLISTIC_BACKEND_DEFAULT // to store default value based on environment
-};
-
-class TextImageClassifier;
-
-/**
- * @brief The ImagePreprocessor class
- */
-class CV_EXPORTS_W ImagePreprocessor{
-protected:
-    virtual void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels)=0;
-    virtual void set_mean_(Mat){}
-
-public:
-    virtual ~ImagePreprocessor(){}
-
-    /** @brief this method in provides public acces to the preprocessing with respect to a specific
-     * classifier
-     *
-     * This method's main use would be to use the preprocessor without feeding it to a classifier.
-     * Determining the exact behavior of a preprocessor is the main motivation for this.
-     *
-     * @param input an image without any constraints
-     *
-     * @param output in most cases an image of fixed depth size and whitened
-     *
-     * @param sz the size to which the image would be resize if the preprocessor resizes inputs
-     *
-     * @param outputChannels the number of channels for the output image
-     */
-    CV_WRAP void preprocess(InputArray input,OutputArray output,Size sz,int outputChannels);
-
-    /** @brief this method in provides public acces to set the mean of the input images
-     * mean can be a mat either of same size of the image or one value per color channel
-     * A preprocessor can be created without the mean( the pre processor will calculate mean for every image
-     * in that case
-     *
-
-     * @param mean which will be subtracted from the images
-     *
-     */
-
-    CV_WRAP void set_mean(Mat mean);
-
-    /** @brief Creates a functor that only resizes and changes the channels of the input
-     *  without further processing.
-     *
-     * @return shared pointer to the generated preprocessor
-     */
-    CV_WRAP static Ptr<ImagePreprocessor> createResizer();
-
-    /** @brief
-     *
-     * @param sigma
-     *
-     * @return shared pointer to generated preprocessor
-     */
-    CV_WRAP static Ptr<ImagePreprocessor> createImageStandarizer(double sigma);
-
-    /** @brief
-     *
-     * @return shared pointer to generated preprocessor
-     */
-    CV_WRAP static Ptr<ImagePreprocessor> createImageMeanSubtractor(InputArray meanImg);
-    /** @brief
-     * create a functor with the parameters, parameters can be changes by corresponding set functions
-     * @return shared pointer to generated preprocessor
-     */
-
-    CV_WRAP static Ptr<ImagePreprocessor>createImageCustomPreprocessor(double rawval=1.0,String channel_order="BGR");
-
-    friend class TextImageClassifier;
-
-};
-
-/** @brief Abstract class that implements the classifcation of text images.
- *
- * The interface is generic enough to describe any image classifier. And allows
- * to take advantage of compouting in batches. While word classifiers are the default
- * networks, any image classifers should work.
- *
- */
-class CV_EXPORTS_W TextImageClassifier
-{
-protected:
-    Size inputGeometry_;
-    Size outputGeometry_;
-    int channelCount_;
-    Ptr<ImagePreprocessor> preprocessor_;
-    /** @brief all image preprocessing is handled here including whitening etc.
-     *
-         *  @param input the image to be preprocessed for the classifier. If the depth
-     * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
-     *
-     * @param output reference to the image to be fed to the classifier, the preprocessor will
-     * resize the image to the apropriate size and convert it to the apropriate depth\
-     *
-     * The method preprocess should never be used externally, it is up to classify and classifyBatch
-     * methods to employ it.
-     */
-    virtual void preprocess(const Mat& input,Mat& output);
-public:
-    virtual ~TextImageClassifier() {}
-
-    /** @brief
-     */
-    CV_WRAP virtual void setPreprocessor(Ptr<ImagePreprocessor> ptr);
-
-    /** @brief
-     */
-    CV_WRAP Ptr<ImagePreprocessor> getPreprocessor();
-
-    /** @brief produces a class confidence row-vector given an image
-     */
-    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
-
-    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
-     */
-    CV_WRAP virtual void classifyBatch(InputArrayOfArrays image, OutputArray classProbabilities) = 0;
-
-    /** @brief simple getter method returning the number of channels each input sample has
-     */
-    CV_WRAP virtual int getInputChannelCount(){return this->channelCount_;}
-
-    /** @brief simple getter method returning the size of the input sample
-     */
-    CV_WRAP virtual Size getInputSize(){return this->inputGeometry_;}
-
-    /** @brief simple getter method returning the size of the oputput row-vector
-     */
-    CV_WRAP virtual int getOutputSize()=0;
-    /** @brief simple getter method returning the shape of the oputput from caffe
-     */
-    CV_WRAP virtual Size getOutputGeometry()=0;
-
-    /** @brief simple getter method returning the size of the minibatches for this classifier.
-     * If not applicabe this method should return 1
-     */
-    CV_WRAP virtual int getMinibatchSize()=0;
-
-    friend class ImagePreprocessor;
-};
-
-
-
-class CV_EXPORTS_W DeepCNN:public TextImageClassifier
-{
-    /** @brief Class that uses a pretrained caffe model for word classification.
-     *
-     * This network is described in detail in:
-     * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
-     * http://arxiv.org/abs/1412.1842
-     */
-public:
-    virtual ~DeepCNN() {};
-
-    /** @brief Constructs a DeepCNN object from a caffe pretrained model
-     *
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
-     * very large, up to 2GB.
-     *
-     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
-     *
-     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
-     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
-     */
-    CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-
-    /** @brief Constructs a DeepCNN intended to be used for word spotting.
-     *
-     * This method loads a pretrained classifier and couples him with a preprocessor that standarises pixels with a
-     * deviation of 113. The architecture file can be downloaded from:
-     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
-     * While the weights can be downloaded from:
-     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
-     * The words assigned to the network outputs are available at:
-     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
-     *
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model. When employing
-     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file. This file can be very large, the
-     * pretrained DictNet uses 2GB.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
-     */
-    CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-
-};
-
-namespace cnn_config{
-
-/** @brief runtime backend information
- *
- * this function finds the status of backends compiled with this module
- *
- * @return a list of backends (caffe,opencv-dnn etc.)
- * */
-CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
-
-namespace caffe_backend{
-
-/** @brief Prompts Caffe on the computation device beeing used
- *
- * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
- * global behavior. This function queries the current state of caffe.
- * If the module is built without caffe, this method throws an exception.
- *
- * @return true if caffe is computing on the GPU, false if caffe is computing on the CPU
- */
-CV_EXPORTS_W bool getCaffeGpuMode();
-
-/** @brief Sets the computation device beeing used by Caffe
- *
- * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
- * global behavior. This function queries the current state of caffe.
- * If the module is built without caffe, this method throws an exception.
- *
- * @param useGpu  set to true for caffe to be computing on the GPU, false if caffe is
- * computing on the CPU
- */
-CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
-
-/** @brief Provides runtime information on whether Caffe support was compiled in.
- *
- * The text module API is the same regardless of whether CAffe was available or not
- * During compilation. When methods that require Caffe are invocked while Caffe support
- * is not compiled in, exceptions are thrown. This method allows to test whether the
- * text module was built with caffe during runtime.
- *
- * @return true if Caffe support for the the text module was provided during compilation,
- * false if Caffe was unavailable.
- */
-CV_EXPORTS_W bool getCaffeAvailable();
-
-}//caffe
-namespace dnn_backend {
-
-/** @brief Provides runtime information on whether DNN module was compiled in.
- *
- * The text module API is the same regardless of whether DNN module was available or not
- * During compilation. When methods that require backend are invocked while no backend support
- * is compiled, exceptions are thrown. This method allows to test whether the
- * text module was built with dnn_backend during runtime.
- *
- * @return true if opencv_dnn support for the the text module was provided during compilation,
- * false if opencv_dnn was unavailable.
- */
-CV_EXPORTS_W bool getDNNAvailable();
-
-}//dnn_backend
-}//cnn_config
-
-/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
- * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
- * word given an input image.
- *
- * This class implements the logic of providing transcriptions given a vocabulary and and an image
- * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
- * class was built is the DictNet. In order to load it the following files should be downloaded:
-
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
- */
-class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
-{
-public:
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
-
-    Takes image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
-
-    @param image Input image CV_8UC1 or CV_8UC3
-
-    @param mask is totally ignored and is only available for compatibillity reasons
-
-    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
-
-    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
-
-    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
-
-    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
-
-    @param component_level must be OCR_LEVEL_WORD.
-     */
-
-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-
-    /**
-    @brief Method that provides a quick and simple interface to a single word image classifcation
-
-    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
-
-    @param transcription an opencv string that will store the detected word transcription
-
-    @param confidence a double that will be updated with the confidence the classifier has for the selected word
-    */
-    CV_WRAP virtual void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)=0;
-
-    /**
-    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
-    the classifiers parallel capabilities.
-
-    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
-    to contain a single word.
-
-    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
-    input image
-
-    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
-    selected words.
-    */
-    CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
-
-
-    /**
-    @brief simple getter for the vocabulary employed
-    */
-    CV_WRAP virtual const std::vector<String>& getVocabulary()=0;
-
-    /** @brief simple getter for the preprocessing functor
-     */
-    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
-
-    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
-
-    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
-
-    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
-    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
-    of the classifier.
-     */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename);
-
-
-    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
-
-    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
-
-    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
-
-    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
-    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
-    of the classifier.
-    */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename);
-
-    /** @brief
-     *
-     * @param classifierPtr
-     *
-     * @param vocabulary
-     */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
-
-    /** @brief
-     *
-     * @param modelArchFilename
-     *
-     * @param modelWeightsFilename
-     *
-     * @param vocabulary
-     */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
-};
-
-
-}//namespace text
-}//namespace cv
-
-
+}
+}
 #endif // _OPENCV_TEXT_OCR_HPP_
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index eda74801449..0e51df39f4c 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -1,56 +1,12 @@
-/*M//////////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
 
 #ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
 #define __OPENCV_TEXT_TEXTDETECTOR_HPP__
 
-#include <vector>
-#include <string>
-#include <iostream>
-#include <sstream>
 #include"ocr.hpp"
 
-
 namespace cv
 {
 namespace text
@@ -59,208 +15,44 @@ namespace text
 //! @addtogroup text_detect
 //! @{
 
-
-
-//base class BaseDetector declares a common API that would be used in a typical text
-//detection scenario
-class CV_EXPORTS_W BaseDetector
-{
-public:
-    virtual ~BaseDetector() {};
-
-    virtual void run(Mat& image,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=0) = 0;
-
-    virtual void run(Mat& image, Mat& mask,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=0) = 0;
-
-};
-/** A virtual class for different models of text detection (including CNN based deep models)
+/** @brief An abstract class providing interface for text detection algorithms
  */
-
-class CV_EXPORTS_W TextRegionDetector
+class CV_EXPORTS_W TextDetector
 {
-protected:
-    /** Stores input and output size
-     */
-    //netGeometry inputGeometry_;
-    //netGeometry outputGeometry_;
-    Size inputGeometry_;
-    Size outputGeometry_;
-    int inputChannelCount_;
-    int outputChannelCount_;
-
 public:
-    virtual ~TextRegionDetector() {}
-
-    /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
-     */
-    CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
-
-
-    /** @brief simple getter method returning the size (height, width) of the input sample
-     */
-    CV_WRAP virtual Size  getInputGeometry(){return this->inputGeometry_;}
-
-    /** @brief simple getter method returning the shape of the oputput
-     *   Any text detector should output a number of text regions alongwith a score of text-ness
-     *   From the shape it can be inferred the number of text regions and number of returned value
-     *   for each region
-     */
-    CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
-
-
-
-};
-
-/** Generic structure of Deep CNN based Text Detectors
- * */
-class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
-{
-    /** @brief Class that uses a pretrained caffe model for text detection.
-     * Any text detection should
-     * This network is described in detail in:
-     * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
-     * https://arxiv.org/abs/1611.06779
-     */
-protected:
-    /** all deep CNN based text detectors have a preprocessor (normally)
-         */
-    Ptr<ImagePreprocessor> preprocessor_;
-    /** @brief all image preprocessing is handled here including whitening etc.
-         *
-         *  @param input the image to be preprocessed for the classifier. If the depth
-         * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
-         *
-         * @param output reference to the image to be fed to the classifier, the preprocessor will
-         * resize the image to the apropriate size and convert it to the apropriate depth\
-         *
-         * The method preprocess should never be used externally, it is up to classify and classifyBatch
-         * methods to employ it.
-         */
-    virtual void preprocess(const Mat& input,Mat& output);
-public:
-    virtual ~DeepCNNTextDetector() {};
-
-    /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
-     *
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
-     *
-     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
-     *
-     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
-     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
-     */
-    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-
-    /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
-     *
-     * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
-     * The architecture and models weights can be downloaded from:
-     * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
-
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model. When employing
-     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
-     */
-    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-    friend class ImagePreprocessor;
+    /**
+    @brief Method that provides a quick and simple interface to detect text inside an image
 
+    @param inputImage an image to process
+    @param Bbox a vector of Rect that will store the detected word bounding box
+    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
+    */
+    virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
+    virtual ~TextDetector() {}
 };
 
-/** @brief textDetector class provides the functionallity of text bounding box detection.
- * A TextRegionDetector is employed to find bounding boxes of text
- * words given an input image.
- *
- * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
- * The TextRegionDetector can be any text detector
- *
+/** @brief TextDetectorCNN class provides the functionallity of text bounding box detection.
+ * A TextDetectorCNN is employed to find bounding boxes of text words given an input image.
  */
-
-class CV_EXPORTS_W textDetector : public BaseDetector
+class CV_EXPORTS_W TextDetectorCNN : public TextDetector
 {
 public:
-    virtual void run(Mat& image,  std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-    /** @brief detect text with a cnn, input is one image with (multiple) ocuurance of text.
-
-    Takes image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
-
-    @param image Input image CV_8UC1 or CV_8UC3
-
-    @param mask is totally ignored and is only available for compatibillity reasons
-
-
-    @param component_rects a vector of Rects, each rect is one text bounding box.
-
-
-
-    @param component_confidences A vector of float returns confidence of text bounding boxes
-
-    @param component_level must be OCR_LEVEL_WORD.
-     */
-
-    virtual void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-
     /**
-    @brief Method that provides a quick and simple interface to detect text inside an image
+    @overload
 
     @param inputImage an image expected to be a CV_U8C3 of any size
-
     @param Bbox a vector of Rect that will store the detected word bounding box
-
     @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
     */
-    CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
-
-
-
-
-    /** @brief simple getter for the preprocessing functor
-     */
-    CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
-
-    /** @brief Creates an instance of the textDetector class.
-
-    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
-
-
-     */
-    CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
-
+    CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
 
     /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
 
     @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
-
     @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
-
-
+    @param detectMultiscale if true, multiple scales of the input image will be used as network input
     */
-    CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
-
-
+    CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale = false);
 };
 
 //! @}
diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py
index 2e8395b60f1..09dcb24927d 100644
--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
@@ -1,57 +1,37 @@
 # -*- coding: utf-8 -*-
-"""
-Created on Wed Jul 19 17:54:00 2017
-
-@author: sgnosh
-"""
-
 #!/usr/bin/python
-
 import sys
 import os
-
 import cv2
 import numpy as np
 
-print('\nDeeptextdetection.py')
-print('       A demo script of text box alogorithm of the paper:')
-print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
-
-
-if (len(sys.argv) < 2):
-  print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
-  quit()
-#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable():
-#        print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
-#
-#        quit()
-# check model and architecture file existance
-if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
-    print " Model files not found in current directory. Aborting"
-    print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
-    quit()
-cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
-pathname = os.path.dirname(sys.argv[0])
+def main():
+    print('\nDeeptextdetection.py')
+    print('       A demo script of text box alogorithm of the paper:')
+    print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
 
+    if (len(sys.argv) < 2):
+        print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
+        quit()
 
-img      = cv2.imread(str(sys.argv[1]))
-textSpotter=cv2.text.textDetector_create(
-                "textbox_deploy.prototxt","textbox.caffemodel")
-rects,outProbs = textSpotter.textDetectInImage(img);
-# for visualization
-vis      = img.copy()
-# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown
-thres = 0.6
+    if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
+        print " Model files not found in current directory. Aborting"
+        print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
+        quit()
 
+    img = cv2.imread(str(sys.argv[1]))
+    textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
+    rects, outProbs = textSpotter.textDetectInImage(img);
+    vis = img.copy()
+    thres = 0.6
 
-  #Visualization
-for r in range(0,np.shape(rects)[0]):
-    if outProbs[r] >thres:
-        rect = rects[r]
-        cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2)
-       # cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
+    for r in range(np.shape(rects)[0]):
+        if outProbs[r] > thres:
+            rect = rects[r]
+            cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2)
 
+    cv2.imshow("Text detection result", vis)
+    cv2.waitKey()
 
-#Visualization
-cv2.imshow("Text detection result", vis)
-cv2.waitKey(0)
\ No newline at end of file
+if __name__ == "__main__":
+    main()
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index b76658e1b7a..9975c394730 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -1,151 +1,86 @@
-/*
- * dictnet_demo.cpp
- *
- * Demonstrates simple use of the holistic word classifier in C++
- *
- * Created on: June 26, 2016
- *     Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
- */
-
-#include  "opencv2/text.hpp"
-#include  "opencv2/highgui.hpp"
-#include  "opencv2/imgproc.hpp"
+#include <opencv2/text.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
 
 #include  <sstream>
-#include  <vector>
 #include  <iostream>
-#include  <iomanip>
 #include  <fstream>
 
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres);
-inline std::string getHelpStr(std::string progFname){
-    std::stringstream out;
-    out << "    Demo of text detection CNN for text detection." << std::endl;
-    out << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
-
-    out << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl;
-    out << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl;
-    out << "      must be in the current directory." << std::endl << std::endl;
+using namespace cv;
 
-    out << "    Obtaining Caffe Model files in linux shell:"<<std::endl;
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl;
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl;
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl;
+namespace
+{
+std::string getHelpStr(std::string progFname)
+{
+    std::stringstream out;
+    out << "    Demo of text detection CNN for text detection." << std::endl
+        << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
+        << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl
+        << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
+        << "      must be in the current directory." << std::endl
+        << "    These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
     return out.str();
 }
 
-inline bool fileExists (std::string filename) {
+bool fileExists (std::string filename)
+{
     std::ifstream f(filename.c_str());
     return f.good();
 }
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6)
+
+void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
 {
-    for (int i=0;i<(int)groups.size(); i++)
+    for (size_t i = 0; i < groups.size(); i++)
     {
-        if(probs[i]>thres)
+        if(probs[i] > thres)
         {
             if (src.type() == CV_8UC3)
             {
-                cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 );
-                cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 ));
+                rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
+                String label = format("%.2f", probs[i]);
+                std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
+                putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
             }
             else
-                rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 );
+                rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
         }
     }
 }
 
+}
 
-int main(int argc, const char * argv[]){
-    if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
-        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
-        //exit(1);
-    }
-    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
-    std::cout << "The Following backends are available" << "\n";
-    for (int i=0;i<backends.size();i++)
-       std::cout << backends[i] << "\n";
-
-   // printf("%s",x);
-    //set to true if you have a GPU with more than 3GB
-     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
-    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
-
-    if (argc < 3){
-        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Insufiecient parameters. Aborting!"<<std::endl;
+int main(int argc, const char * argv[])
+{
+    if (argc < 2)
+    {
+        std::cout << getHelpStr(argv[0]);
+        std::cout << "Insufiecient parameters. Aborting!" << std::endl;
         exit(1);
     }
 
     if (!fileExists("textbox.caffemodel") ||
-            !fileExists("textbox_deploy.prototxt")){
-           // !fileExists("dictnet_vgg_labels.txt"))
-
-        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl;
-        exit(1);
-    }
-
-    if (fileExists(argv[1])){
+            !fileExists("textbox_deploy.prototxt"))
+    {
         std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Output file must not exist. Aborting!"<<std::endl;
+        std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
         exit(1);
     }
 
-    cv::Mat image;
-    image = cv::imread(cv::String(argv[2]));
-
+    Mat image = imread(String(argv[1]), IMREAD_COLOR);
 
-    std::cout<<"Starting Text Box Demo"<<std::endl;
-    cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create(
-                "textbox_deploy.prototxt","textbox.caffemodel");
+    std::cout << "Starting Text Box Demo" << std::endl;
+    Ptr<text::TextDetectorCNN> textSpotter =
+            text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
 
-    //cv::Ptr<cv::text::textDetector> wordSpotter=
-      //      cv::text::textDetector::create(cnn);
-    std::cout<<"Created Text Spotter with text Boxes";
-
-    std::vector<cv::Rect> bbox;
+    std::vector<Rect> bbox;
     std::vector<float> outProbabillities;
-    textSpotter->textDetectInImage(image,bbox,outProbabillities);
-   // textbox_draw(image, bbox,outProbabillities);
-    float thres =0.6f;
-    std::vector<cv::Mat> imageList;
-    for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
-        if(outProbabillities[imageIdx]>thres){
-            imageList.push_back(image(bbox.at(imageIdx)));
-        }
-
-    }
-    // call dict net here for all detected parts
-    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
-
-    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
-            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
-
-    std::vector<cv::String> wordList;
-    std::vector<double> wordProbabillities;
-    wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities);
-    // write the output in file
-    std::ofstream out;
-    out.open(argv[1]);
-
-
-    for (int i=0;i<(int)wordList.size(); i++)
-    {
-        cv::Point tl_ = bbox.at(i).tl();
-        cv::Point br_ = bbox.at(i).br();
-
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
-
-    }
-    out.close();
-    textbox_draw(image, bbox,outProbabillities,wordList);
+    textSpotter->textDetectInImage(image, bbox, outProbabillities);
 
+    textbox_draw(image, bbox, outProbabillities, 0.5f);
 
-    cv::imshow("TextBox Demo",image);
+    imshow("TextBox Demo",image);
     std::cout << "Done!" << std::endl << std::endl;
     std::cout << "Press any key to exit." << std::endl << std::endl;
-    if ((cv::waitKey()&0xff) == ' ')
-        return 0;
+    waitKey();
+    return 0;
 }
diff --git a/modules/text/src/image_preprocessor.cpp b/modules/text/src/image_preprocessor.cpp
deleted file mode 100644
index 3a65a210863..00000000000
--- a/modules/text/src/image_preprocessor.cpp
+++ /dev/null
@@ -1,387 +0,0 @@
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include  "opencv2/highgui.hpp"
-#include "opencv2/core.hpp"
-
-
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <queue>
-#include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace cv { namespace text {
-//************************************************************************************
-//******************   ImagePreprocessor   *******************************************
-//************************************************************************************
-
-void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
-    Mat inpImg=input.getMat();
-    Mat outImg;
-    this->preprocess_(inpImg,outImg,sz,outputChannels);
-    outImg.copyTo(output);
-}
-void ImagePreprocessor::set_mean(Mat mean){
-
-
-    this->set_mean_(mean);
-
-}
-
-
-
-class ResizerPreprocessor: public ImagePreprocessor{
-protected:
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1){
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U){
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-    }
-    //void set_mean_(Mat m){}
-public:
-    ResizerPreprocessor(){}
-    ~ResizerPreprocessor(){}
-};
-
-class StandarizerPreprocessor: public ImagePreprocessor{
-protected:
-    double sigma_;
-    //void set_mean_(Mat M){}
-
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-
-        Scalar mean,dev;
-        meanStdDev(output,mean,dev);
-        subtract(output,mean[0],output);
-        divide(output,(dev[0]/sigma_),output);
-    }
-public:
-    StandarizerPreprocessor(double sigma):sigma_(sigma){}
-    ~StandarizerPreprocessor(){}
-
-};
-
-class customPreprocessor:public ImagePreprocessor{
-protected:
-
-    double rawval_;
-    Mat mean_;
-    String channel_order_;
-
-    void set_mean_(Mat imMean_){
-
-        imMean_.copyTo(this->mean_);
-
-
-    }
-
-    void set_raw_scale(int rawval){
-        rawval_ = rawval;
-
-    }
-    void set_channels(String channel_order){
-        channel_order_=channel_order;
-    }
-
-
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        tmpInput.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        tmpInput.convertTo(output, CV_32FC1);
-                    else
-                        tmpInput.convertTo(output, CV_32FC1,rawval_);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        tmpInput.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        tmpInput.convertTo(output, CV_32FC1);
-                    else
-                        tmpInput.convertTo(output, CV_32FC1,rawval_);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        input.convertTo(output,CV_32FC1,1/255.0);
-                    else
-                        input.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        input.convertTo(output, CV_32FC1);
-                    else
-                        input.convertTo(output, CV_32FC1,rawval_);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        input.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        input.convertTo(output,CV_32FC3);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        input.convertTo(output, CV_32FC3);
-                    else
-                        input.convertTo(output, CV_32FC3,rawval_);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-
-        if (!this->mean_.empty()){
-
-            Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2));
-            subtract(output,mean_s,output);
-        }
-        else{
-            Scalar mean_s;
-            mean_s = mean(output);
-            subtract(output,mean_s,output);
-        }
-
-    }
-
-public:
-    customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){}
-    ~customPreprocessor(){}
-
-};
-
-class MeanSubtractorPreprocessor: public ImagePreprocessor{
-protected:
-    Mat mean_;
-    //void set_mean_(Mat m){}
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-        subtract(output,this->mean_,output);
-    }
-public:
-    MeanSubtractorPreprocessor(Mat mean)
-    {
-        mean.copyTo(this->mean_);
-    }
-
-    ~MeanSubtractorPreprocessor(){}
-};
-
-
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
-{
-    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
-{
-    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
-}
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order)
-{
-
-    return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order));
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
-{
-    Mat tmp=meanImg.getMat();
-    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
-}
-}
-}
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
deleted file mode 100644
index 035f104f28a..00000000000
--- a/modules/text/src/ocr_holistic.cpp
+++ /dev/null
@@ -1,697 +0,0 @@
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include  "opencv2/highgui.hpp"
-#include "opencv2/core.hpp"
-
-
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <queue>
-#include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-
-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
-
-#ifdef HAVE_DNN
-#include "opencv2/dnn.hpp"
-#endif
-
-using namespace cv;
-using namespace cv::dnn;
-using namespace std;
-namespace cv { namespace text {
-
-//Maybe OpenCV has a routine better suited
-inline bool fileExists (String filename) {
-    std::ifstream f(filename.c_str());
-    return f.good();
-}
-
-
-
-//************************************************************************************
-//******************   TextImageClassifier   *****************************************
-//************************************************************************************
-
-void TextImageClassifier::preprocess(const Mat& input,Mat& output)
-{
-    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
-}
-
-void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
-{
-    CV_Assert(!ptr.empty());
-    preprocessor_=ptr;
-}
-
-Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
-{
-    return preprocessor_;
-}
-
-
-class DeepCNNCaffeImpl: public DeepCNN{
-protected:
-    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
-    {
-        //Classifies a list of images containing at most minibatchSz_ images
-        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
-        CV_Assert(outputMat.isContinuous());
-
-
-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
-
-        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-        {
-            std::vector<Mat> input_channels;
-            Mat preprocessed;
-            // if the image have multiple color channels the input layer should be populated accordingly
-            for (int channel=0;channel < this->channelCount_;channel++){
-
-                cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-                input_channels.push_back(netInputWraped);
-                //input_data += width * height;
-                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-
-            }
-            this->preprocess(inputImageList[imgNum],preprocessed);
-            split(preprocessed, input_channels);
-
-
-        }
-        this->net_->ForwardPrefilled();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
-
-
-        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
-        float*outputMatData=(float*)(outputMat.data);
-        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
-
-#endif
-    }
-
-#ifdef HAVE_CAFFE
-    Ptr<caffe::Net<float> > net_;
-#endif
-    //Size inputGeometry_;//=Size(100,32);
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    int outputSize_;
-    //Size outputGeometry_;
-public:
-    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
-        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
-        channelCount_=dn.channelCount_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
-    {
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->channelCount_=dn.channelCount_;
-        this->minibatchSz_=dn.minibatchSz_;
-        this->outputSize_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
-    {
-
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_CAFFE
-        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-        CV_Assert(net_->num_inputs()==1);
-        CV_Assert(net_->num_outputs()==1);
-        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-                ||this->net_->input_blobs()[0]->channels()==3);
-        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
-
-        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-
-        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-
-        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = inputLayer->channels();
-
-        inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        net_->Reshape();
-        this->outputSize_=net_->output_blobs()[0]->channels();
-        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-
-
-
-
-
-#else
-        CV_Error(Error::StsError,"Caffe not available during compilation!");
-#endif
-    }
-
-    void classify(InputArray image, OutputArray classProbabilities)
-    {
-        std::vector<Mat> inputImageList;
-        inputImageList.push_back(image.getMat());
-        classifyBatch(inputImageList,classProbabilities);
-    }
-
-    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
-    {
-        std::vector<Mat> allImageVector;
-        inputImageList.getMatVector(allImageVector);
-        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
-
-        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
-        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
-        Mat outputMat = classProbabilities.getMat();
-        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
-        {
-            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
-            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
-            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
-            std::vector<Mat> minibatchInput(from,to);
-            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
-
-        }
-
-    }
-
-    int getOutputSize()
-    {
-        return this->outputSize_;
-    }
-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_CAFFE;
-    }
-};
-
-class DeepCNNOpenCvDNNImpl: public DeepCNN{
-protected:
-
-    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
-    {
-        //Classifies a list of images containing at most minibatchSz_ images
-        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
-        CV_Assert(outputMat.isContinuous());
-
-#ifdef HAVE_DNN
-
-        std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
-
-        Mat preprocessed;
-        // preprocesses each image in the inputImageList and push to preprocessedImList
-        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-        {
-            this->preprocess(inputImageList[imgNum],preprocessed);
-            preProcessedImList.push_back(preprocessed);
-        }
-        // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
-
-        float*outputMatData=(float*)(outputMat.data);
-       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
-       Mat outputNet = this->net_->forward();
-       outputNet = outputNet.reshape(1, 1);
-
-       float*outputNetData=(float*)(outputNet.data);
-
-       memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
-
-#endif
-    }
-
-#ifdef HAVE_DNN
-    Ptr<Net> net_;
-#endif
-    // hard coding input image size. anything in DNN library to get that from prototxt??
-   // Size inputGeometry_;//=Size(100,32);
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    int outputSize_;
-    //Size outputGeometry_;//= Size(1,1);
-    //int channelCount_;
-   // int inputChannel_ ;//=1;
-  //  int _inputHeight;
-    //int _inputWidth ;
-    //int _inputChannel ;
-public:
-    DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
-        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
-        channelCount_=dn.channelCount_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn)
-    {
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->channelCount_=dn.channelCount_;
-        this->minibatchSz_=dn.minibatchSz_;
-        this->outputSize_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth ,int inputHeight ,int inputChannel )
-        :minibatchSz_(maxMinibatchSz)
-    {
-
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_DNN
-
-        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
-
-
-
-        if (this->net_.empty())
-        {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
-            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
-            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
-            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-            exit(-1);
-        }
-
-
-        this->inputGeometry_=Size(inputWidth,inputHeight);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = inputChannel;//inputLayer->channels();
-
-        //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
-        //std::vector<Mat> blobs = outLayer->blobs;
-
-        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
-        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-
-
-
-
-
-
-#else
-        CV_Error(Error::StsError,"DNN module not available during compilation!");
-#endif
-    }
-
-    void classify(InputArray image, OutputArray classProbabilities)
-    {
-        std::vector<Mat> inputImageList;
-        inputImageList.push_back(image.getMat());
-        classifyBatch(inputImageList,classProbabilities);
-    }
-
-    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
-    {
-        std::vector<Mat> allImageVector;
-        inputImageList.getMatVector(allImageVector);
-        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
-
-        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
-        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
-        Mat outputMat = classProbabilities.getMat();
-
-        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
-        {
-            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
-            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
-            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
-            std::vector<Mat> minibatchInput(from,to);
-            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
-
-        }
-
-    }
-
-    int getOutputSize()
-    {
-        return this->outputSize_;
-    }
-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_DNN;
-    }
-};
-
-Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
-{
-    if(preprocessor.empty())
-    {
-        preprocessor=ImagePreprocessor::createResizer();
-    }
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
-#else
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-#endif
-        break;
-
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-        break;
-  case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-
-Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
-{
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
-#else
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-#endif
-        break;
-
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
-        break;
-   case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-namespace cnn_config{
-std::vector<std::string> getAvailableBackends()
-{
-    std::vector<std::string> backends;
-
-#ifdef HAVE_CAFFE
-    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
-
-#endif
-#ifdef HAVE_DNN
-    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
-#endif
-    return backends;
-
-
-}
-
-namespace caffe_backend{
-
-#ifdef HAVE_CAFFE
-
-bool getCaffeGpuMode()
-{
-    return caffe::Caffe::mode()==caffe::Caffe::GPU;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    if(useGpu)
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::GPU);
-    }else
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::CPU);
-    }
-}
-
-bool getCaffeAvailable()
-{
-    return true;
-}
-#else
-
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
-}
-
-bool getCaffeAvailable(){
-    return 0;
-}
-
-#endif
-
-}//namespace caffe
-namespace dnn_backend{
-#ifdef  HAVE_DNN
-
-
-bool getDNNAvailable(){
-    return true;
-}
-#else
-bool getDNNAvailable(){
-    return 0;
-}
-#endif
-}//namspace dnn_backend
-}//namespace cnn_config
-
-class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
-private:
-    struct NetOutput{
-        //Auxiliary structure that handles the logic of getting class ids and probabillities from
-        //the raw outputs of caffe
-        int wordIdx;
-        float probabillity;
-
-        static bool sorter(const NetOutput& o1,const NetOutput& o2)
-        {//used with std::sort to provide the most probable class
-            return o1.probabillity>o2.probabillity;
-        }
-
-        static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res)
-        {
-            res.resize(nbOutputs);
-            for(int k=0;k<nbOutputs;k++)
-            {
-                res[k].wordIdx=k;
-                res[k].probabillity=buffer[k];
-            }
-            std::sort(res.begin(),res.end(),NetOutput::sorter);
-        }
-
-        static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence)
-        {
-            std::vector<NetOutput> tmp;
-            getOutputs(buffer,nbOutputs,tmp);
-            classNum=tmp[0].wordIdx;
-            confidence=tmp[0].probabillity;
-
-        }
-    };
-protected:
-    std::vector<String> labels_;
-    Ptr<TextImageClassifier> classifier_;
-public:
-    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename):classifier_(classifierPtr)
-    {
-        CV_Assert(fileExists(vocabularyFilename));//this fails for some rason
-        std::ifstream labelsFile(vocabularyFilename.c_str());
-        if(!labelsFile)
-        {
-            CV_Error(Error::StsError,"Could not read Labels from file");
-        }
-        std::string line;
-        while (std::getline(labelsFile, line))
-        {
-            labels_.push_back(std::string(line));
-        }
-        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
-    }
-
-    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary):classifier_(classifierPtr)
-    {
-        this->labels_=vocabulary;
-        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
-    }
-
-    void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)
-    {
-        Mat netOutput;
-        this->classifier_->classify(inputImage,netOutput);
-        int classNum;
-        NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence);
-        transcription=this->labels_[classNum];
-    }
-
-    void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec)
-    {
-        Mat netOutput;
-        this->classifier_->classifyBatch(inputImageList,netOutput);
-
-        for(int k=0;k<netOutput.rows;k++)
-        {
-            int classNum;
-            double confidence;
-            NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence);
-            transcriptionVec.push_back(this->labels_[classNum]);
-            confidenceVec.push_back(confidence);
-        }
-    }
-
-
-    void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
-        double confidence;
-        String transcription;
-        recogniseImage(image,transcription,confidence);
-        output_text=transcription.c_str();
-        if(component_rects!=NULL)
-        {
-            component_rects->resize(1);
-            (*component_rects)[0]=Rect(0,0,image.size().width,image.size().height);
-        }
-        if(component_texts!=NULL)
-        {
-            component_texts->resize(1);
-            (*component_texts)[0]=transcription.c_str();
-        }
-        if(component_confidences!=NULL)
-        {
-            component_confidences->resize(1);
-            (*component_confidences)[0]=float(confidence);
-        }
-    }
-
-    void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
-        this->run(image,output_text,component_rects,component_texts,component_confidences,component_level);
-    }
-
-    std::vector<String>& getVocabulary()
-    {
-        return this->labels_;
-    }
-
-    Ptr<TextImageClassifier> getClassifier()
-    {
-        return this->classifier_;
-    }
-};
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename )
-{
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
-}
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename)
-{
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
-}
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary)
-{
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
-}
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector<String>& vocabulary){
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
-}
-
-
-
-
-
-}  } //namespace text namespace cv
diff --git a/modules/text/src/precomp.hpp b/modules/text/src/precomp.hpp
index e85e4eb85cb..7ccda150f37 100644
--- a/modules/text/src/precomp.hpp
+++ b/modules/text/src/precomp.hpp
@@ -45,6 +45,8 @@
 
 #include "opencv2/text.hpp"
 
+#include "text_config.hpp"
+
 #ifdef HAVE_TESSERACT
 #if !defined(USE_STD_NAMESPACE)
 #define USE_STD_NAMESPACE
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
deleted file mode 100644
index 949f5f86dc4..00000000000
--- a/modules/text/src/text_detector.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/core.hpp"
-
-
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <queue>
-#include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-
-//#ifdef HAVE_CAFFE
-//#include "caffe/caffe.hpp"
-//#endif
-
-namespace cv { namespace text {
-
-
-
-
-class textDetectImpl: public textDetector{
-private:
-    struct NetOutput{
-        //Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
-        //the raw outputs of caffe
-        Rect bbox;
-        float probability;
-
-
-        static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
-        {
-
-            res.resize(nbrTextBoxes);
-            for(int k=0;k<nbrTextBoxes;k++)
-            {
-                float x_min = buffer[k*nCol+3]*inputShape.width;
-                float y_min = buffer[k*nCol+4]*inputShape.height;
-                float x_max = buffer[k*nCol+5]*inputShape.width;
-                float y_max = buffer[k*nCol +6]*inputShape.height;
-                x_min = x_min<0?0:x_min;
-                y_min = y_min<0?0:y_min;
-                x_max = x_max> inputShape.width?inputShape.width-1:x_max;
-                y_max = y_max > inputShape.height?inputShape.height-1:y_max;
-                float wd = x_max-x_min+1;
-                float ht = y_max-y_min+1;
-
-                res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
-
-                res[k].probability=buffer[k*nCol+2];
-            }
-
-        }
-
-
-    };
-protected:
-
-    Ptr<TextRegionDetector> classifier_;
-public:
-    textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
-    {
-
-    }
-
-
-
-    void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
-    {
-                Mat netOutput;
-                // call the detect function of deepTextCNN class
-                this->classifier_->detect(inputImage,netOutput);
-               // get the output geometry i.e height and width of output blob from caffe
-                Size OutputGeometry_ = this->classifier_->getOutputGeometry();
-                int nbrTextBoxes = OutputGeometry_.height;
-                int nCol = OutputGeometry_.width;
-
-                std::vector<NetOutput> tmp;
-                // the output bounding box needs to be resized by the input height and width
-                Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
-                NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
-                // put the output in CV_OUT
-
-                for (int k=0;k<nbrTextBoxes;k++)
-                {
-                    Bbox.push_back(tmp[k].bbox);
-                    confidence.push_back(tmp[k].probability);
-                }
-
-     }
-
-
-
-    void run(Mat& image, std::vector<Rect>* component_rects=NULL,
-             std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
-
-        std::vector<Rect> bbox;
-        std::vector<float> score;
-        textDetectInImage(image,bbox,score);
-
-        if(component_rects!=NULL)
-        {
-            component_rects->resize(bbox.size());  // should be a user behavior
-
-            component_rects = &bbox;
-        }
-
-        if(component_confidences!=NULL)
-        {
-            component_confidences->resize(score.size()); // shoub be a user behavior
-
-            component_confidences = &score;
-        }
-    }
-
-    void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
-             std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
-        this->run(image,component_rects,component_confidences,component_level);
-    }
-
-
-
-    Ptr<TextRegionDetector> getClassifier()
-    {
-        return this->classifier_;
-    }
-};
-
-Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
-{
-    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
-}
-
-Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
-{
-
-// create a custom preprocessor with rawval
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
-// set the mean for the preprocessor
-
-    Mat textbox_mean(1,3,CV_8U);
-    textbox_mean.at<uchar>(0,0)=104;
-    textbox_mean.at<uchar>(0,1)=117;
-    textbox_mean.at<uchar>(0,2)=123;
-    preprocessor->set_mean(textbox_mean);
-// create a pointer to text box detector(textDetector)
-    Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
-    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
-}
-
-
-
-
-
-
-
-}  } //namespace text namespace cv
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 5267b390fed..1c3933fda47 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -1,453 +1,101 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"
 
-
-
-#include <iostream>
 #include <fstream>
-#include <sstream>
-#include <queue>
 #include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-
-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
 
-#ifdef HAVE_DNN
 #include "opencv2/dnn.hpp"
-#endif
 
 using namespace cv::dnn;
 
-#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__)
-
-namespace cv { namespace text {
-
-inline bool fileExists (String filename) {
-    std::ifstream f(filename.c_str());
-    return f.good();
-}
-
-class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
-protected:
-
-
-    void process_(Mat inputImage, Mat &outputMat)
-    {
-        // do forward pass and stores the output in outputMat
-        CV_Assert(outputMat.isContinuous());
-        if (inputImage.channels() != this->inputChannelCount_)
-            CV_WARN("Number of input channel(s) in the model is not same as input");
-
-
-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
-
-        std::vector<Mat> input_channels;
-        Mat preprocessed;
-        // if the image have multiple color channels the input layer should be populated accordingly
-        for (int channel=0;channel < this->inputChannelCount_;channel++){
-
-            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-            input_channels.push_back(netInputWraped);
-            //input_data += width * height;
-            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-        }
-        this->preprocess(inputImage,preprocessed);
-        split(preprocessed, input_channels);
-
-        //preprocessed.copyTo(netInputWraped);
-
-
-        this->net_->Forward();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-
-
-
-
-        this->outputGeometry_.height = net_->output_blobs()[0]->height();
-        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-        float*outputMatData=(float*)(outputMat.data);
-
-        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-
-
-
-#endif
-    }
-
-
-#ifdef HAVE_CAFFE
-    Ptr<caffe::Net<float> > net_;
-#endif
-    //Size inputGeometry_;
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    //int outputSize_;
-public:
-    DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn):
-        minibatchSz_(dn.minibatchSz_){
-        outputGeometry_=dn.outputGeometry_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn)
-    {
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->inputChannelCount_=dn.inputChannelCount_;
-        this->outputChannelCount_ = dn.outputChannelCount_;
-        // this->minibatchSz_=dn.minibatchSz_;
-        //this->outputGeometry_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
-    {
-
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_CAFFE
-        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-        CV_Assert(net_->num_inputs()==1);
-        CV_Assert(net_->num_outputs()==1);
-        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-                ||this->net_->input_blobs()[0]->channels()==3);
-        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
-
-        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-
-        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-
-        this->inputGeometry_.height = inputLayer->height();
-        this->inputGeometry_.width = inputLayer->width();
-        this->inputChannelCount_ = inputLayer->channels();
-        //this->inputGeometry_.batchSize =1;
-
-        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        net_->Reshape();
-        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-        //this->outputGeometry_.batchSize =1;
-        this->outputGeometry_.height =net_->output_blobs()[0]->height();
-        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-
-#else
-        CV_Error(Error::StsError,"Caffe not available during compilation!");
-#endif
-    }
-
-
-    void detect(InputArray image, OutputArray Bbox_prob)
-    {
-        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
-        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
-        Mat outputMat = Bbox_prob.getMat();
-        process_(image.getMat(),outputMat);
-        //copy back to outputArray
-        outputMat.copyTo(Bbox_prob);
-    }
-
-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
-    }
-    Size getinputGeometry()
-    {
-        return this->inputGeometry_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_CAFFE;
-    }
-    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
-    {
-        CV_Assert(!ptr.empty());
-        preprocessor_=ptr;
-    }
-
-    Ptr<ImagePreprocessor> getPreprocessor()
-    {
-        return preprocessor_;
-    }
-};
-
+namespace cv
+{
+namespace text
+{
 
-class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
+class TextDetectorCNNImpl : public TextDetectorCNN
+{
 protected:
+    Net net_;
+    std::vector<Size> sizes_;
+    int inputChannelCount_;
+    bool detectMultiscale_;
 
 
-    void process_(Mat inputImage, Mat &outputMat)
+    void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,
+                               std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape)
     {
-        // do forward pass and stores the output in outputMat
-        CV_Assert(outputMat.isContinuous());
-        if (inputImage.channels() != this->inputChannelCount_)
-            CV_WARN("Number of input channel(s) in the model is not same as input");
-
-
-#ifdef HAVE_DNN
-
-        Mat preprocessed;
-        this->preprocess(inputImage,preprocessed);
-
-        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
-
-       Mat outputNet = this->net_->forward( );
-
-       this->outputGeometry_.height = outputNet.size[2];
-       this->outputGeometry_.width = outputNet.size[3];
-       this->outputChannelCount_ = outputNet.size[1];
+        for(int k = 0; k < nbrTextBoxes; k++)
+        {
+            float x_min = buffer[k*nCol + 3]*inputShape.width;
+            float y_min = buffer[k*nCol + 4]*inputShape.height;
 
-       outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-        float*outputMatData=(float*)(outputMat.data);
-       float*outputNetData=(float*)(outputNet.data);
-       int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
+            float x_max = buffer[k*nCol + 5]*inputShape.width;
+            float y_max = buffer[k*nCol + 6]*inputShape.height;
 
-       memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+            CV_Assert(x_min < x_max, y_min < y_max);
 
+            x_min = std::max(0.f, x_min);
+            y_min = std::max(0.f, y_min);
 
+            x_max = std::min(inputShape.width - 1.f,  x_max);
+            y_max = std::min(inputShape.height - 1.f,  y_max);
 
+            int wd = cvRound(x_max - x_min);
+            int ht = cvRound(y_max - y_min);
 
-#endif
+            Bbox.push_back(Rect(cvRound(x_min), cvRound(y_min), wd, ht));
+            confidence.push_back(buffer[k*nCol + 2]);
+        }
     }
 
-
-
-#ifdef HAVE_DNN
-    Ptr<Net> net_;
-#endif
-    //Size inputGeometry_;
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    //int outputSize_;
-    //int inputHeight_;
-    //int inputWidth_;
-    //int inputChannel_;
 public:
-    DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
-        minibatchSz_(dn.minibatchSz_){
-        outputGeometry_=dn.outputGeometry_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNTextDetectorDNNImpl& operator=(const DeepCNNTextDetectorDNNImpl &dn)
-    {
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->inputChannelCount_=dn.inputChannelCount_;
-        this->outputChannelCount_ = dn.outputChannelCount_;
-        // this->minibatchSz_=dn.minibatchSz_;
-        //this->outputGeometry_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3)
-        :minibatchSz_(maxMinibatchSz)
+    TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) :
+        detectMultiscale_(detectMultiscale)
     {
+        net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename);
+        CV_Assert(!net_.empty());
+        inputChannelCount_ = 3;
+        sizes_.push_back(Size(700, 700));
 
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_DNN
-        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
-
-        if (this->net_.empty())
+        if(detectMultiscale_)
         {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
-            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
-            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
-            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-            exit(-1);
+            sizes_.push_back(Size(300, 300));
+            sizes_.push_back(Size(700,500));
+            sizes_.push_back(Size(700,300));
+            sizes_.push_back(Size(1600,1600));
         }
-
-        this->inputGeometry_.height =inputHeight;
-        this->inputGeometry_.width = inputWidth ;//inputLayer->width();
-        this->inputChannelCount_ = inputChannel ;//inputLayer->channels();
-
-#else
-        CV_Error(Error::StsError,"DNN module not available during compilation!");
-#endif
     }
 
-
-    void detect(InputArray image, OutputArray Bbox_prob)
+    void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
     {
-        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
-        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
-        Mat outputMat = Bbox_prob.getMat();
+        CV_Assert(inputImage_.channels() == inputChannelCount_);
+        Mat inputImage = inputImage_.getMat().clone();
+        Bbox.resize(0);
+        confidence.resize(0);
 
-        process_(image.getMat(),outputMat);
-        //copy back to outputArray
-        outputMat.copyTo(Bbox_prob);
-    }
-
-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
-    }
-    Size getinputGeometry()
-    {
-        return this->inputGeometry_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_DNN;
-    }
-    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
-    {
-        CV_Assert(!ptr.empty());
-        preprocessor_=ptr;
-    }
-
-    Ptr<ImagePreprocessor> getPreprocessor()
-    {
-        return preprocessor_;
-    }
+        for(size_t i = 0; i < sizes_.size(); i++)
+        {
+            Size inputGeometry = sizes_[i];
+            net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data");
+            Mat outputNet = net_.forward();
+            int nbrTextBoxes = outputNet.size[2];
+            int nCol = outputNet.size[3];
+            int outputChannelCount = outputNet.size[1];
+            CV_Assert(outputChannelCount == 1);
+            getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size());
+        }
+     }
 };
 
-Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale)
 {
-    if(preprocessor.empty())
-    {
-        // create a custom preprocessor with rawval
-        preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
-        // set the mean for the preprocessor
-
-        Mat textbox_mean(1,3,CV_8U);
-        textbox_mean.at<uchar>(0,0)=104;
-        textbox_mean.at<uchar>(0,1)=117;
-        textbox_mean.at<uchar>(0,2)=123;
-        preprocessor->set_mean(textbox_mean);
-    }
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
-#else
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-#endif
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-        break;
-
-    case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
-        break;
-
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-        break;
-    }
-    //return Ptr<DeepCNNTextDetector>();
-
+    return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale);
 }
-
-
-Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd)
-{
-
-    // create a custom preprocessor with rawval
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
-    // set the mean for the preprocessor
-
-    Mat textbox_mean(1,3,CV_8U);
-    textbox_mean.at<uchar>(0,0)=104;
-    textbox_mean.at<uchar>(0,1)=117;
-    textbox_mean.at<uchar>(0,2)=123;
-    preprocessor->set_mean(textbox_mean);
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
-#else
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-#endif
-        break;
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
-        break;
-    case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
-         break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-        break;
-    }
-    //return Ptr<DeepCNNTextDetector>();
-
-}
-
-void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
-{
-    Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width);
-    this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_);
-}
-
-
-
-}  } //namespace text namespace cv
+} //namespace text
+} //namespace cv
diff --git a/modules/text/text_config.hpp.in b/modules/text/text_config.hpp.in
index 81e624bab37..ec5120a4160 100644
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
@@ -1,4 +1,7 @@
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__
 
+// HAVE OCR Tesseract
+#cmakedefine HAVE_TESSERACT
+
 #endif

From 1306621f3d17f695565ff5cc39ecee953ca93ee0 Mon Sep 17 00:00:00 2001
From: Vladislav Sovrasov <sovrasov.vlad@gmail.com>
Date: Tue, 10 Oct 2017 15:29:20 +0300
Subject: [PATCH 26/31] text: add prototxt for text detection model

---
 modules/text/samples/textbox.prototxt | 1605 +++++++++++++++++++++++++
 1 file changed, 1605 insertions(+)
 create mode 100644 modules/text/samples/textbox.prototxt

diff --git a/modules/text/samples/textbox.prototxt b/modules/text/samples/textbox.prototxt
new file mode 100644
index 00000000000..6e8cb688ef4
--- /dev/null
+++ b/modules/text/samples/textbox.prototxt
@@ -0,0 +1,1605 @@
+name: "VGG_text_longer_conv_300x300_deploy"
+input: "data"
+input_shape {
+  dim: 1
+  dim: 3
+  dim: 700
+  dim: 700
+}
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1_1"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu1_1"
+  type: "ReLU"
+  bottom: "conv1_1"
+  top: "conv1_1"
+}
+layer {
+  name: "conv1_2"
+  type: "Convolution"
+  bottom: "conv1_1"
+  top: "conv1_2"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu1_2"
+  type: "ReLU"
+  bottom: "conv1_2"
+  top: "conv1_2"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv1_2"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv2_1"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "conv2_1"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu2_1"
+  type: "ReLU"
+  bottom: "conv2_1"
+  top: "conv2_1"
+}
+layer {
+  name: "conv2_2"
+  type: "Convolution"
+  bottom: "conv2_1"
+  top: "conv2_2"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu2_2"
+  type: "ReLU"
+  bottom: "conv2_2"
+  top: "conv2_2"
+}
+layer {
+  name: "pool2"
+  type: "Pooling"
+  bottom: "conv2_2"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv3_1"
+  type: "Convolution"
+  bottom: "pool2"
+  top: "conv3_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu3_1"
+  type: "ReLU"
+  bottom: "conv3_1"
+  top: "conv3_1"
+}
+layer {
+  name: "conv3_2"
+  type: "Convolution"
+  bottom: "conv3_1"
+  top: "conv3_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu3_2"
+  type: "ReLU"
+  bottom: "conv3_2"
+  top: "conv3_2"
+}
+layer {
+  name: "conv3_3"
+  type: "Convolution"
+  bottom: "conv3_2"
+  top: "conv3_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu3_3"
+  type: "ReLU"
+  bottom: "conv3_3"
+  top: "conv3_3"
+}
+layer {
+  name: "pool3"
+  type: "Pooling"
+  bottom: "conv3_3"
+  top: "pool3"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv4_1"
+  type: "Convolution"
+  bottom: "pool3"
+  top: "conv4_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu4_1"
+  type: "ReLU"
+  bottom: "conv4_1"
+  top: "conv4_1"
+}
+layer {
+  name: "conv4_2"
+  type: "Convolution"
+  bottom: "conv4_1"
+  top: "conv4_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu4_2"
+  type: "ReLU"
+  bottom: "conv4_2"
+  top: "conv4_2"
+}
+layer {
+  name: "conv4_3"
+  type: "Convolution"
+  bottom: "conv4_2"
+  top: "conv4_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu4_3"
+  type: "ReLU"
+  bottom: "conv4_3"
+  top: "conv4_3"
+}
+layer {
+  name: "pool4"
+  type: "Pooling"
+  bottom: "conv4_3"
+  top: "pool4"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv5_1"
+  type: "Convolution"
+  bottom: "pool4"
+  top: "conv5_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu5_1"
+  type: "ReLU"
+  bottom: "conv5_1"
+  top: "conv5_1"
+}
+layer {
+  name: "conv5_2"
+  type: "Convolution"
+  bottom: "conv5_1"
+  top: "conv5_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu5_2"
+  type: "ReLU"
+  bottom: "conv5_2"
+  top: "conv5_2"
+}
+layer {
+  name: "conv5_3"
+  type: "Convolution"
+  bottom: "conv5_2"
+  top: "conv5_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu5_3"
+  type: "ReLU"
+  bottom: "conv5_3"
+  top: "conv5_3"
+}
+layer {
+  name: "pool5"
+  type: "Pooling"
+  bottom: "conv5_3"
+  top: "pool5"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "fc6"
+  type: "Convolution"
+  bottom: "pool5"
+  top: "fc6"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 1024
+    pad: 6
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    dilation: 6
+  }
+}
+layer {
+  name: "relu6"
+  type: "ReLU"
+  bottom: "fc6"
+  top: "fc6"
+}
+layer {
+  name: "fc7"
+  type: "Convolution"
+  bottom: "fc6"
+  top: "fc7"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 1024
+    kernel_size: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu7"
+  type: "ReLU"
+  bottom: "fc7"
+  top: "fc7"
+}
+layer {
+  name: "conv6_1"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "conv6_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_1_relu"
+  type: "ReLU"
+  bottom: "conv6_1"
+  top: "conv6_1"
+}
+layer {
+  name: "conv6_2"
+  type: "Convolution"
+  bottom: "conv6_1"
+  top: "conv6_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_2_relu"
+  type: "ReLU"
+  bottom: "conv6_2"
+  top: "conv6_2"
+}
+layer {
+  name: "conv7_1"
+  type: "Convolution"
+  bottom: "conv6_2"
+  top: "conv7_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_1_relu"
+  type: "ReLU"
+  bottom: "conv7_1"
+  top: "conv7_1"
+}
+layer {
+  name: "conv7_2"
+  type: "Convolution"
+  bottom: "conv7_1"
+  top: "conv7_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_2_relu"
+  type: "ReLU"
+  bottom: "conv7_2"
+  top: "conv7_2"
+}
+layer {
+  name: "conv8_1"
+  type: "Convolution"
+  bottom: "conv7_2"
+  top: "conv8_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_1_relu"
+  type: "ReLU"
+  bottom: "conv8_1"
+  top: "conv8_1"
+}
+layer {
+  name: "conv8_2"
+  type: "Convolution"
+  bottom: "conv8_1"
+  top: "conv8_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_2_relu"
+  type: "ReLU"
+  bottom: "conv8_2"
+  top: "conv8_2"
+}
+layer {
+  name: "pool6"
+  type: "Pooling"
+  bottom: "conv8_2"
+  top: "pool6"
+  pooling_param {
+    pool: AVE
+    global_pooling: true
+  }
+}
+layer {
+  name: "conv4_3_norm"
+  type: "Normalize"
+  bottom: "conv4_3"
+  top: "conv4_3_norm"
+  norm_param {
+    across_spatial: false
+    scale_filler {
+      type: "constant"
+      value: 20
+    }
+    channel_shared: false
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc"
+  type: "Convolution"
+  bottom: "conv4_3_norm"
+  top: "conv4_3_norm_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 48
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv4_3_norm_mbox_loc"
+  top: "conv4_3_norm_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv4_3_norm_mbox_loc_perm"
+  top: "conv4_3_norm_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf"
+  type: "Convolution"
+  bottom: "conv4_3_norm"
+  top: "conv4_3_norm_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv4_3_norm_mbox_conf"
+  top: "conv4_3_norm_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv4_3_norm_mbox_conf_perm"
+  top: "conv4_3_norm_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv4_3_norm"
+  bottom: "data"
+  top: "conv4_3_norm_mbox_priorbox"
+  prior_box_param {
+    min_size: 30.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    aspect_ratio: 5
+    aspect_ratio: 7
+    aspect_ratio: 10
+    flip: false
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "fc7_mbox_loc"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "fc7_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 56
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "fc7_mbox_loc_perm"
+  type: "Permute"
+  bottom: "fc7_mbox_loc"
+  top: "fc7_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "fc7_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "fc7_mbox_loc_perm"
+  top: "fc7_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "fc7_mbox_conf"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "fc7_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 28
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "fc7_mbox_conf_perm"
+  type: "Permute"
+  bottom: "fc7_mbox_conf"
+  top: "fc7_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "fc7_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "fc7_mbox_conf_perm"
+  top: "fc7_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "fc7_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "fc7"
+  bottom: "data"
+  top: "fc7_mbox_priorbox"
+  prior_box_param {
+    min_size: 60.0
+    max_size: 114.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    aspect_ratio: 5
+    aspect_ratio: 7
+    aspect_ratio: 10
+    flip: false
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv6_2"
+  top: "conv6_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 56
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv6_2_mbox_loc"
+  top: "conv6_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv6_2_mbox_loc_perm"
+  top: "conv6_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv6_2"
+  top: "conv6_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 28
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv6_2_mbox_conf"
+  top: "conv6_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv6_2_mbox_conf_perm"
+  top: "conv6_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv6_2"
+  bottom: "data"
+  top: "conv6_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 114.0
+    max_size: 168.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    aspect_ratio: 5
+    aspect_ratio: 7
+    aspect_ratio: 10
+    flip: false
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv7_2"
+  top: "conv7_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 56
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv7_2_mbox_loc"
+  top: "conv7_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv7_2_mbox_loc_perm"
+  top: "conv7_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv7_2"
+  top: "conv7_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 28
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv7_2_mbox_conf"
+  top: "conv7_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv7_2_mbox_conf_perm"
+  top: "conv7_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv7_2"
+  bottom: "data"
+  top: "conv7_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 168.0
+    max_size: 222.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    aspect_ratio: 5
+    aspect_ratio: 7
+    aspect_ratio: 10
+    flip: false
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv8_2"
+  top: "conv8_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 56
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv8_2_mbox_loc"
+  top: "conv8_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv8_2_mbox_loc_perm"
+  top: "conv8_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv8_2"
+  top: "conv8_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 28
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv8_2_mbox_conf"
+  top: "conv8_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv8_2_mbox_conf_perm"
+  top: "conv8_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv8_2"
+  bottom: "data"
+  top: "conv8_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 222.0
+    max_size: 276.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    aspect_ratio: 5
+    aspect_ratio: 7
+    aspect_ratio: 10
+    flip: false
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "pool6_mbox_loc"
+  type: "Convolution"
+  bottom: "pool6"
+  top: "pool6_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 56
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "pool6_mbox_loc_perm"
+  type: "Permute"
+  bottom: "pool6_mbox_loc"
+  top: "pool6_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "pool6_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "pool6_mbox_loc_perm"
+  top: "pool6_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "pool6_mbox_conf"
+  type: "Convolution"
+  bottom: "pool6"
+  top: "pool6_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 28
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    pad_h: 0
+    pad_w: 2
+    kernel_h: 1
+    kernel_w: 5
+    stride_h: 1
+    stride_w: 1
+  }
+}
+layer {
+  name: "pool6_mbox_conf_perm"
+  type: "Permute"
+  bottom: "pool6_mbox_conf"
+  top: "pool6_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "pool6_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "pool6_mbox_conf_perm"
+  top: "pool6_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "pool6_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "pool6"
+  bottom: "data"
+  top: "pool6_mbox_priorbox"
+  prior_box_param {
+    min_size: 276.0
+    max_size: 330.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    aspect_ratio: 5
+    aspect_ratio: 7
+    aspect_ratio: 10
+    flip: false
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "mbox_loc"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_loc_flat"
+  bottom: "fc7_mbox_loc_flat"
+  bottom: "conv6_2_mbox_loc_flat"
+  bottom: "conv7_2_mbox_loc_flat"
+  bottom: "conv8_2_mbox_loc_flat"
+  bottom: "pool6_mbox_loc_flat"
+  top: "mbox_loc"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mbox_conf"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_conf_flat"
+  bottom: "fc7_mbox_conf_flat"
+  bottom: "conv6_2_mbox_conf_flat"
+  bottom: "conv7_2_mbox_conf_flat"
+  bottom: "conv8_2_mbox_conf_flat"
+  bottom: "pool6_mbox_conf_flat"
+  top: "mbox_conf"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mbox_priorbox"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_priorbox"
+  bottom: "fc7_mbox_priorbox"
+  bottom: "conv6_2_mbox_priorbox"
+  bottom: "conv7_2_mbox_priorbox"
+  bottom: "conv8_2_mbox_priorbox"
+  bottom: "pool6_mbox_priorbox"
+  top: "mbox_priorbox"
+  concat_param {
+    axis: 2
+  }
+}
+layer {
+  name: "mbox_conf_reshape"
+  type: "Reshape"
+  bottom: "mbox_conf"
+  top: "mbox_conf_reshape"
+  reshape_param {
+    shape {
+      dim: 0
+      dim: -1
+      dim: 2
+    }
+  }
+}
+layer {
+  name: "mbox_conf_softmax"
+  type: "Softmax"
+  bottom: "mbox_conf_reshape"
+  top: "mbox_conf_softmax"
+  softmax_param {
+    axis: 2
+  }
+}
+layer {
+  name: "mbox_conf_flatten"
+  type: "Flatten"
+  bottom: "mbox_conf_softmax"
+  top: "mbox_conf_flatten"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "detection_out"
+  type: "DetectionOutput"
+  bottom: "mbox_loc"
+  bottom: "mbox_conf_flatten"
+  bottom: "mbox_priorbox"
+  top: "detection_out"
+  include {
+    phase: TEST
+  }
+  detection_output_param {
+    num_classes: 2
+    share_location: true
+    background_label_id: 0
+    nms_param {
+      nms_threshold: 0.45
+      top_k: 400
+    }
+    code_type: CENTER_SIZE
+    keep_top_k: 200
+    confidence_threshold: 0.01
+  }
+}

From 3253fe9f7ef4abe0ffa7f1eb0a800c23e2c26978 Mon Sep 17 00:00:00 2001
From: Vladislav Sovrasov <sovrasov.vlad@gmail.com>
Date: Tue, 10 Oct 2017 16:08:35 +0300
Subject: [PATCH 27/31] text: impovements in samples and module interface

---
 modules/text/doc/text.bib                       | 12 +++++++++++-
 .../text/include/opencv2/text/textDetector.hpp  | 12 ++++++++----
 modules/text/samples/deeptextdetection.py       |  8 ++++----
 modules/text/samples/textbox.prototxt           |  6 ++++++
 modules/text/samples/textbox_demo.cpp           | 17 +++++++++--------
 modules/text/src/text_detectorCNN.cpp           |  2 +-
 6 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/modules/text/doc/text.bib b/modules/text/doc/text.bib
index 64a8f4a197a..d2ed9f9b6d8 100644
--- a/modules/text/doc/text.bib
+++ b/modules/text/doc/text.bib
@@ -31,4 +31,14 @@ @article{Gomez14
   journal   = {CoRR},
   volume    = {abs/1407.7504},
   year      = {2014},
-}
\ No newline at end of file
+}
+@inproceedings{LiaoSBWL17,
+  author    = {Minghui Liao and
+               Baoguang Shi and
+               Xiang Bai and
+               Xinggang Wang and
+               Wenyu Liu},
+  title     = {TextBoxes: {A} Fast Text Detector with a Single Deep Neural Network},
+  booktitle = {AAAI},
+  year      = {2017}
+}
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index 0e51df39f4c..9c780ae31e4 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -27,12 +27,16 @@ class CV_EXPORTS_W TextDetector
     @param Bbox a vector of Rect that will store the detected word bounding box
     @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
     */
-    virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
+    CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
     virtual ~TextDetector() {}
 };
 
 /** @brief TextDetectorCNN class provides the functionallity of text bounding box detection.
- * A TextDetectorCNN is employed to find bounding boxes of text words given an input image.
+ This class is representing to find bounding boxes of text words given an input image.
+ This class uses OpenCV dnn module to load pre-trained model described in @cite LiaoSBWL17.
+ The original repository with the modified SSD Caffe version: https://github.com/MhLiao/TextBoxes.
+ Model can be downloaded from [DropBox](https://www.dropbox.com/s/g8pjzv2de9gty8g/TextBoxes_icdar13.caffemodel?dl=0).
+ Modified .prototxt file with the model description can be found in `opencv_contrib/modules/text/samples/textbox.prototxt`.
  */
 class CV_EXPORTS_W TextDetectorCNN : public TextDetector
 {
@@ -44,9 +48,9 @@ class CV_EXPORTS_W TextDetectorCNN : public TextDetector
     @param Bbox a vector of Rect that will store the detected word bounding box
     @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
     */
-    CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
+    CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
 
-    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
+    /** @brief Creates an instance of the TextDetectorCNN class using the provided parameters.
 
     @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
     @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py
index 09dcb24927d..256a28e9eba 100644
--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
@@ -14,14 +14,14 @@ def main():
         print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
         quit()
 
-    if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
+    if not os.path.isfile('TextBoxes_icdar13.caffemodel') or not os.path.isfile('textbox.prototxt'):
         print " Model files not found in current directory. Aborting"
-        print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
+        print " See the documentation of text::TextDetectorCNN class to get download links."
         quit()
 
     img = cv2.imread(str(sys.argv[1]))
-    textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
-    rects, outProbs = textSpotter.textDetectInImage(img);
+    textSpotter = cv2.text.TextDetectorCNN_create("textbox.prototxt", "TextBoxes_icdar13.caffemodel")
+    rects, outProbs = textSpotter.detect(img);
     vis = img.copy()
     thres = 0.6
 
diff --git a/modules/text/samples/textbox.prototxt b/modules/text/samples/textbox.prototxt
index 6e8cb688ef4..bb80198281d 100644
--- a/modules/text/samples/textbox.prototxt
+++ b/modules/text/samples/textbox.prototxt
@@ -885,6 +885,7 @@ layer {
     variance: 0.1
     variance: 0.2
     variance: 0.2
+    additional_y_offset: true
   }
 }
 layer {
@@ -1009,6 +1010,7 @@ layer {
     variance: 0.1
     variance: 0.2
     variance: 0.2
+    additional_y_offset: true
   }
 }
 layer {
@@ -1133,6 +1135,7 @@ layer {
     variance: 0.1
     variance: 0.2
     variance: 0.2
+    additional_y_offset: true
   }
 }
 layer {
@@ -1257,6 +1260,7 @@ layer {
     variance: 0.1
     variance: 0.2
     variance: 0.2
+    additional_y_offset: true
   }
 }
 layer {
@@ -1381,6 +1385,7 @@ layer {
     variance: 0.1
     variance: 0.2
     variance: 0.2
+    additional_y_offset: true
   }
 }
 layer {
@@ -1505,6 +1510,7 @@ layer {
     variance: 0.1
     variance: 0.2
     variance: 0.2
+    additional_y_offset: true
   }
 }
 layer {
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index 9975c394730..f3c292836a5 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -10,15 +10,14 @@ using namespace cv;
 
 namespace
 {
-std::string getHelpStr(std::string progFname)
+std::string getHelpStr(const std::string& progFname)
 {
     std::stringstream out;
     out << "    Demo of text detection CNN for text detection." << std::endl
         << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
         << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl
-        << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
-        << "      must be in the current directory." << std::endl
-        << "    These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
+        << "    Caffe Model files  (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<std::endl
+        << "      must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << std::endl;
     return out.str();
 }
 
@@ -58,8 +57,10 @@ int main(int argc, const char * argv[])
         exit(1);
     }
 
-    if (!fileExists("textbox.caffemodel") ||
-            !fileExists("textbox_deploy.prototxt"))
+    const std::string modelArch = "textbox.prototxt";
+    const std::string moddelWeights = "TextBoxes_icdar13.caffemodel";
+
+    if (!fileExists(modelArch) || !fileExists(moddelWeights))
     {
         std::cout<<getHelpStr(argv[0]);
         std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
@@ -70,11 +71,11 @@ int main(int argc, const char * argv[])
 
     std::cout << "Starting Text Box Demo" << std::endl;
     Ptr<text::TextDetectorCNN> textSpotter =
-            text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
+            text::TextDetectorCNN::create(modelArch, moddelWeights, false);
 
     std::vector<Rect> bbox;
     std::vector<float> outProbabillities;
-    textSpotter->textDetectInImage(image, bbox, outProbabillities);
+    textSpotter->detect(image, bbox, outProbabillities);
 
     textbox_draw(image, bbox, outProbabillities, 0.5f);
 
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 1c3933fda47..cd624985fcd 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -72,7 +72,7 @@ class TextDetectorCNNImpl : public TextDetectorCNN
         }
     }
 
-    void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
+    void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
     {
         CV_Assert(inputImage_.channels() == inputChannelCount_);
         Mat inputImage = inputImage_.getMat().clone();

From 9195d2e6140acecb0312d0ccf04f8cbb98a22a87 Mon Sep 17 00:00:00 2001
From: Vladislav Sovrasov <sovrasov.vlad@gmail.com>
Date: Wed, 11 Oct 2017 14:47:52 +0300
Subject: [PATCH 28/31] text: small adjustments in samples and image
 preprocessing

---
 modules/text/samples/dictnet_demo.cpp | 9 ---------
 modules/text/samples/textbox_demo.cpp | 4 ++--
 modules/text/src/ocr_holistic.cpp     | 4 ++++
 modules/text/src/text_detectorCNN.cpp | 9 +++++----
 4 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/modules/text/samples/dictnet_demo.cpp b/modules/text/samples/dictnet_demo.cpp
index 277a1c9be35..f70f2c17592 100644
--- a/modules/text/samples/dictnet_demo.cpp
+++ b/modules/text/samples/dictnet_demo.cpp
@@ -1,12 +1,3 @@
-/*
- * dictnet_demo.cpp
- *
- * Demonstrates simple use of the holistic word classifier in C++
- *
- * Created on: June 26, 2016
- *     Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
- */
-
 #include  "opencv2/text.hpp"
 #include  "opencv2/highgui.hpp"
 #include  "opencv2/imgproc.hpp"
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index f3c292836a5..e6412f9f569 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -14,14 +14,14 @@ std::string getHelpStr(const std::string& progFname)
 {
     std::stringstream out;
     out << "    Demo of text detection CNN for text detection." << std::endl
-        << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
+        << "    Minghui Liao, Baoguang Shi, Xiang Bai, Xinggang Wang, Wenyu Liu: TextBoxes: A Fast Text Detector with a Single Deep Neural Network, AAAI2017\n\n"
         << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl
         << "    Caffe Model files  (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<std::endl
         << "      must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << std::endl;
     return out.str();
 }
 
-bool fileExists (std::string filename)
+bool fileExists (const std::string& filename)
 {
     std::ifstream f(filename.c_str());
     return f.good();
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index 77016edf274..07c4aa98f47 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -1,3 +1,7 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index cd624985fcd..23a84f01597 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -5,12 +5,11 @@
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"
+#include "opencv2/dnn.hpp"
 
 #include <fstream>
 #include <algorithm>
 
-#include "opencv2/dnn.hpp"
-
 using namespace cv::dnn;
 
 namespace cv
@@ -75,20 +74,22 @@ class TextDetectorCNNImpl : public TextDetectorCNN
     void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
     {
         CV_Assert(inputImage_.channels() == inputChannelCount_);
-        Mat inputImage = inputImage_.getMat().clone();
+        Size inputSize = inputImage_.getMat().size();
         Bbox.resize(0);
         confidence.resize(0);
 
         for(size_t i = 0; i < sizes_.size(); i++)
         {
             Size inputGeometry = sizes_[i];
+            Mat inputImage = inputImage_.getMat().clone();
+            resize(inputImage, inputImage, inputGeometry);
             net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data");
             Mat outputNet = net_.forward();
             int nbrTextBoxes = outputNet.size[2];
             int nCol = outputNet.size[3];
             int outputChannelCount = outputNet.size[1];
             CV_Assert(outputChannelCount == 1);
-            getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size());
+            getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputSize);
         }
      }
 };

From 7031316cb7f4700cf720ce2969020de0e399e685 Mon Sep 17 00:00:00 2001
From: Vladislav Sovrasov <sovrasov.vlad@gmail.com>
Date: Wed, 11 Oct 2017 14:48:35 +0300
Subject: [PATCH 29/31] text: add text recognition sample

---
 modules/text/samples/text_recognition_cnn.cpp | 109 ++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 modules/text/samples/text_recognition_cnn.cpp

diff --git a/modules/text/samples/text_recognition_cnn.cpp b/modules/text/samples/text_recognition_cnn.cpp
new file mode 100644
index 00000000000..f0269a7d9fc
--- /dev/null
+++ b/modules/text/samples/text_recognition_cnn.cpp
@@ -0,0 +1,109 @@
+#include <opencv2/text.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include  <iostream>
+#include  <fstream>
+
+using namespace cv;
+using namespace std;
+
+namespace
+{
+void printHelpStr(const string& progFname)
+{
+    cout << "   Demo of text recognition CNN for text detection." << endl
+         << "   Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<endl<<endl
+         << "   Usage: " << progFname << " <output_file> <input_image>" << endl
+         << "   Caffe Model files (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<endl
+         << "     must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << endl
+         << "   Obtaining text recognition Caffe Model files in linux shell:" << endl
+         << "   wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel" << endl
+         << "   wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt" << endl
+         << "   wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt" <<endl << endl;
+}
+
+bool fileExists (const string& filename)
+{
+    ifstream f(filename.c_str());
+    return f.good();
+}
+
+void textbox_draw(Mat src, vector<Rect>& groups, vector<float>& probs, float thres)
+{
+    for (size_t i = 0; i < groups.size(); i++)
+    {
+        if(probs[i] > thres)
+        {
+            if (src.type() == CV_8UC3)
+            {
+                rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
+                String label = format("%.2f", probs[i]);
+                cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
+                putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
+            }
+            else
+                rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
+        }
+    }
+}
+
+}
+
+int main(int argc, const char * argv[])
+{
+    if (argc < 2)
+    {
+        printHelpStr(argv[0]);
+        cout << "Insufiecient parameters. Aborting!" << endl;
+        exit(1);
+    }
+
+    const string modelArch = "textbox.prototxt";
+    const string moddelWeights = "TextBoxes_icdar13.caffemodel";
+
+    if (!fileExists(modelArch) || !fileExists(moddelWeights))
+    {
+        printHelpStr(argv[0]);
+        cout << "Model files not found in the current directory. Aborting!" << endl;
+        exit(1);
+    }
+
+    Mat image = imread(String(argv[1]), IMREAD_COLOR);
+
+    cout << "Starting Text Box Demo" << endl;
+    Ptr<text::TextDetectorCNN> textSpotter =
+            text::TextDetectorCNN::create(modelArch, moddelWeights, false);
+
+    vector<Rect> bbox;
+    vector<float> outProbabillities;
+    textSpotter->detect(image, bbox, outProbabillities);
+
+    float prob_threshold = 0.6f;
+    Mat image_copy = image.clone();
+    textbox_draw(image_copy, bbox, outProbabillities, prob_threshold);
+    imshow("Text detection", image_copy);
+    image_copy = image.clone();
+
+    Ptr<text::OCRHolisticWordRecognizer> wordSpotter =
+            text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
+
+    for(size_t i = 0; i < bbox.size(); i++)
+    {
+        if(outProbabillities[i] > prob_threshold)
+        {
+            Mat wordImg;
+            cvtColor(image(bbox[i]), wordImg, COLOR_BGR2GRAY);
+            string word;
+            vector<float> confs;
+            wordSpotter->run(wordImg, word, NULL, NULL, &confs);
+            rectangle(image_copy, bbox[i], Scalar(0, 255, 255), 1, LINE_AA);
+            putText(image_copy, word, bbox[i].tl(), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1, LINE_AA);
+        }
+    }
+    imshow("Text recognition", image_copy);
+    cout << "Recognition finished. Press any key to exit.\n";
+    waitKey();
+    return 0;
+}
+

From 27961cd8ccc043ccd20ca54d89859710f14a8559 Mon Sep 17 00:00:00 2001
From: Vladislav Sovrasov <sovrasov.vlad@gmail.com>
Date: Wed, 11 Oct 2017 16:34:06 +0300
Subject: [PATCH 30/31] text: fix wrong channel swap in TestDetectorCNN

---
 modules/text/samples/text_recognition_cnn.cpp | 1 -
 modules/text/src/text_detectorCNN.cpp         | 8 +++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/text/samples/text_recognition_cnn.cpp b/modules/text/samples/text_recognition_cnn.cpp
index f0269a7d9fc..d7a95398bff 100644
--- a/modules/text/samples/text_recognition_cnn.cpp
+++ b/modules/text/samples/text_recognition_cnn.cpp
@@ -106,4 +106,3 @@ int main(int argc, const char * argv[])
     waitKey();
     return 0;
 }
-
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index 23a84f01597..e74594bac0b 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -74,22 +74,20 @@ class TextDetectorCNNImpl : public TextDetectorCNN
     void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
     {
         CV_Assert(inputImage_.channels() == inputChannelCount_);
-        Size inputSize = inputImage_.getMat().size();
+        Mat inputImage = inputImage_.getMat();
         Bbox.resize(0);
         confidence.resize(0);
 
         for(size_t i = 0; i < sizes_.size(); i++)
         {
             Size inputGeometry = sizes_[i];
-            Mat inputImage = inputImage_.getMat().clone();
-            resize(inputImage, inputImage, inputGeometry);
-            net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data");
+            net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104), false, false), "data");
             Mat outputNet = net_.forward();
             int nbrTextBoxes = outputNet.size[2];
             int nCol = outputNet.size[3];
             int outputChannelCount = outputNet.size[1];
             CV_Assert(outputChannelCount == 1);
-            getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputSize);
+            getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size());
         }
      }
 };

From fd2e37da56e945f741ee7296ef8745473a9f7b64 Mon Sep 17 00:00:00 2001
From: Vladislav Sovrasov <sovrasov.vlad@gmail.com>
Date: Mon, 30 Oct 2017 15:33:12 +0300
Subject: [PATCH 31/31] text: improve DL-based samples

---
 .../include/opencv2/text/textDetector.hpp     | 10 ++-
 modules/text/samples/text_recognition_cnn.cpp | 66 +++++++++++--------
 modules/text/samples/textbox_demo.cpp         | 39 ++++++-----
 modules/text/src/text_detectorCNN.cpp         | 24 +++----
 4 files changed, 81 insertions(+), 58 deletions(-)

diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index 9c780ae31e4..fdb92fdfbd0 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -54,9 +54,15 @@ class CV_EXPORTS_W TextDetectorCNN : public TextDetector
 
     @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
     @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
-    @param detectMultiscale if true, multiple scales of the input image will be used as network input
+    @param detectionSizes a list of sizes for multiscale detection. The values`[(300,300),(700,500),(700,300),(700,700),(1600,1600)]` are
+    recommended in @cite LiaoSBWL17 to achieve the best quality.
     */
-    CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale = false);
+    static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename,
+                                               std::vector<Size> detectionSizes);
+    /**
+      @overload
+    */
+    CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename);
 };
 
 //! @}
diff --git a/modules/text/samples/text_recognition_cnn.cpp b/modules/text/samples/text_recognition_cnn.cpp
index d7a95398bff..84df57d297d 100644
--- a/modules/text/samples/text_recognition_cnn.cpp
+++ b/modules/text/samples/text_recognition_cnn.cpp
@@ -1,6 +1,7 @@
 #include <opencv2/text.hpp>
 #include <opencv2/highgui.hpp>
 #include <opencv2/imgproc.hpp>
+#include <opencv2/dnn.hpp>
 
 #include  <iostream>
 #include  <fstream>
@@ -29,22 +30,27 @@ bool fileExists (const string& filename)
     return f.good();
 }
 
-void textbox_draw(Mat src, vector<Rect>& groups, vector<float>& probs, float thres)
+void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes)
 {
-    for (size_t i = 0; i < groups.size(); i++)
+    for (size_t i = 0; i < indexes.size(); i++)
     {
-        if(probs[i] > thres)
+        if (src.type() == CV_8UC3)
         {
-            if (src.type() == CV_8UC3)
-            {
-                rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
-                String label = format("%.2f", probs[i]);
-                cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
-                putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
-            }
-            else
-                rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
+            Rect currrentBox = groups[indexes[i]];
+            rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
+            String label = format("%.2f", probs[indexes[i]]);
+            std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n";
+
+            int baseLine = 0;
+            Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
+            int yLeftBottom = std::max(currrentBox.y, labelSize.height);
+            rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height),
+                      Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
+
+            putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
         }
+        else
+            rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
     }
 }
 
@@ -73,33 +79,41 @@ int main(int argc, const char * argv[])
 
     cout << "Starting Text Box Demo" << endl;
     Ptr<text::TextDetectorCNN> textSpotter =
-            text::TextDetectorCNN::create(modelArch, moddelWeights, false);
+            text::TextDetectorCNN::create(modelArch, moddelWeights);
 
     vector<Rect> bbox;
     vector<float> outProbabillities;
     textSpotter->detect(image, bbox, outProbabillities);
+    std::vector<int> indexes;
+    cv::dnn::NMSBoxes(bbox, outProbabillities, 0.4f, 0.5f, indexes);
 
-    float prob_threshold = 0.6f;
     Mat image_copy = image.clone();
-    textbox_draw(image_copy, bbox, outProbabillities, prob_threshold);
+    textbox_draw(image_copy, bbox, outProbabillities, indexes);
     imshow("Text detection", image_copy);
     image_copy = image.clone();
 
     Ptr<text::OCRHolisticWordRecognizer> wordSpotter =
             text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
 
-    for(size_t i = 0; i < bbox.size(); i++)
+    for(size_t i = 0; i < indexes.size(); i++)
     {
-        if(outProbabillities[i] > prob_threshold)
-        {
-            Mat wordImg;
-            cvtColor(image(bbox[i]), wordImg, COLOR_BGR2GRAY);
-            string word;
-            vector<float> confs;
-            wordSpotter->run(wordImg, word, NULL, NULL, &confs);
-            rectangle(image_copy, bbox[i], Scalar(0, 255, 255), 1, LINE_AA);
-            putText(image_copy, word, bbox[i].tl(), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1, LINE_AA);
-        }
+        Mat wordImg;
+        cvtColor(image(bbox[indexes[i]]), wordImg, COLOR_BGR2GRAY);
+        string word;
+        vector<float> confs;
+        wordSpotter->run(wordImg, word, NULL, NULL, &confs);
+
+        Rect currrentBox = bbox[indexes[i]];
+        rectangle(image_copy, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
+
+        int baseLine = 0;
+        Size labelSize = getTextSize(word, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
+        int yLeftBottom = std::max(currrentBox.y, labelSize.height);
+        rectangle(image_copy, Point(currrentBox.x, yLeftBottom - labelSize.height),
+                  Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
+
+        putText(image_copy, word, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
+
     }
     imshow("Text recognition", image_copy);
     cout << "Recognition finished. Press any key to exit.\n";
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index e6412f9f569..1cf9a9aabf4 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -1,6 +1,7 @@
 #include <opencv2/text.hpp>
 #include <opencv2/highgui.hpp>
 #include <opencv2/imgproc.hpp>
+#include <opencv2/dnn.hpp>
 
 #include  <sstream>
 #include  <iostream>
@@ -27,22 +28,27 @@ bool fileExists (const std::string& filename)
     return f.good();
 }
 
-void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
+void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes)
 {
-    for (size_t i = 0; i < groups.size(); i++)
+    for (size_t i = 0; i < indexes.size(); i++)
     {
-        if(probs[i] > thres)
+        if (src.type() == CV_8UC3)
         {
-            if (src.type() == CV_8UC3)
-            {
-                rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
-                String label = format("%.2f", probs[i]);
-                std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
-                putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
-            }
-            else
-                rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
+            Rect currrentBox = groups[indexes[i]];
+            rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
+            String label = format("%.2f", probs[indexes[i]]);
+            std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n";
+
+            int baseLine = 0;
+            Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
+            int yLeftBottom = std::max(currrentBox.y, labelSize.height);
+            rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height),
+                      Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
+
+            putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
         }
+        else
+            rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
     }
 }
 
@@ -62,7 +68,7 @@ int main(int argc, const char * argv[])
 
     if (!fileExists(modelArch) || !fileExists(moddelWeights))
     {
-        std::cout<<getHelpStr(argv[0]);
+        std::cout << getHelpStr(argv[0]);
         std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
         exit(1);
     }
@@ -71,13 +77,16 @@ int main(int argc, const char * argv[])
 
     std::cout << "Starting Text Box Demo" << std::endl;
     Ptr<text::TextDetectorCNN> textSpotter =
-            text::TextDetectorCNN::create(modelArch, moddelWeights, false);
+            text::TextDetectorCNN::create(modelArch, moddelWeights);
 
     std::vector<Rect> bbox;
     std::vector<float> outProbabillities;
     textSpotter->detect(image, bbox, outProbabillities);
 
-    textbox_draw(image, bbox, outProbabillities, 0.5f);
+    std::vector<int> indexes;
+    cv::dnn::NMSBoxes(bbox, outProbabillities, 0.3f, 0.4f, indexes);
+
+    textbox_draw(image, bbox, outProbabillities, indexes);
 
     imshow("TextBox Demo",image);
     std::cout << "Done!" << std::endl << std::endl;
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index e74594bac0b..84f769b4251 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -23,8 +23,6 @@ class TextDetectorCNNImpl : public TextDetectorCNN
     Net net_;
     std::vector<Size> sizes_;
     int inputChannelCount_;
-    bool detectMultiscale_;
-
 
     void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,
                                std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape)
@@ -54,21 +52,12 @@ class TextDetectorCNNImpl : public TextDetectorCNN
     }
 
 public:
-    TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) :
-        detectMultiscale_(detectMultiscale)
+    TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, std::vector<Size> detectionSizes) :
+        sizes_(detectionSizes)
     {
         net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename);
         CV_Assert(!net_.empty());
         inputChannelCount_ = 3;
-        sizes_.push_back(Size(700, 700));
-
-        if(detectMultiscale_)
-        {
-            sizes_.push_back(Size(300, 300));
-            sizes_.push_back(Size(700,500));
-            sizes_.push_back(Size(700,300));
-            sizes_.push_back(Size(1600,1600));
-        }
     }
 
     void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
@@ -92,9 +81,14 @@ class TextDetectorCNNImpl : public TextDetectorCNN
      }
 };
 
-Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale)
+Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, std::vector<Size> detectionSizes)
+{
+    return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectionSizes);
+}
+
+Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename)
 {
-    return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale);
+    return create(modelArchFilename, modelWeightsFilename, std::vector<Size>(1, Size(300, 300)));
 }
 } //namespace text
 } //namespace cv