Skip to content

Gsoc-2017 Text detect and recognition dnn backend #1348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
9ae765a
Text detector class and Custom Image processor Class
sghoshcvc Jun 22, 2017
40db962
Add sample script
sghoshcvc Jun 22, 2017
fc9c41b
Minor modification
sghoshcvc Jun 23, 2017
e494efb
Added comments
sghoshcvc Jun 23, 2017
2b8ed12
added instructions to build
sghoshcvc Jul 5, 2017
be395e5
Modified the class heirarchy
sghoshcvc Jul 19, 2017
1bc908b
Added python sample script
sghoshcvc Jul 19, 2017
73ddeab
simple cleaning and added comments
sghoshcvc Jul 19, 2017
9071ca7
Merge branch 'master' into gsoc_textDetect_2017
sghoshcvc Jul 21, 2017
8cf800e
fix a dependency bug
sghoshcvc Jul 21, 2017
a617059
removed Java Wrapper
sghoshcvc Jul 21, 2017
ca2a2ab
Removed white space errors and platform specific warnings
sghoshcvc Jul 21, 2017
b913cac
Fixed Doxygen Warning and error
sghoshcvc Jul 22, 2017
4c9af58
Fixed Text box demo error
sghoshcvc Jul 22, 2017
103fbaf
White Space error in sample python script
sghoshcvc Jul 23, 2017
0e74d63
Modified to handle windows warning
sghoshcvc Jul 23, 2017
111b3be
Modified to silent Clang warnings
sghoshcvc Jul 24, 2017
a2cab07
DNN backend initial commit
sghoshcvc Aug 22, 2017
c697e41
added calculation of output size
sghoshcvc Aug 28, 2017
731637e
Merge branch 'master' into GSOC_text_detect_DNN_backend
sghoshcvc Aug 28, 2017
dc48968
removed blanks, fixed Cmake issue
sghoshcvc Sep 5, 2017
e98f42e
Merge branch 'GSOC_text_detect_DNN_backend' of https://github.com/sgh…
sghoshcvc Sep 5, 2017
af536b1
seperate image pre-processing from ocr code
sghoshcvc Sep 5, 2017
efc864c
removed hard coding height and width
sghoshcvc Sep 15, 2017
887e6e5
removed hard codinginput parameters
sghoshcvc Sep 17, 2017
878258b
modified initializers
sghoshcvc Sep 17, 2017
bf630be
Modified initializers list
sghoshcvc Sep 18, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 79 additions & 19 deletions modules/text/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,84 @@
set(the_description "Text Detection and Recognition")
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python java)

if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
find_package(Tesseract QUIET)
if(Tesseract_FOUND)
message(STATUS "Tesseract: YES")
set(HAVE_TESSERACT 1)
ocv_include_directories(${Tesseract_INCLUDE_DIR})
ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
else()
message(STATUS "Tesseract: NO")
endif()

if(POLICY CMP0023)
message(STATUS "Explicitly setting policy CMP0023 to OLD")
cmake_policy(SET CMP0023 OLD)
endif(POLICY CMP0023)

# Using cmake scripts and modules
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})

set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
Copy link
Contributor

@sovrasov sovrasov Sep 28, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This variable is unused. Also all the commented code should be removed form the cmake script.


find_package(Caffe)
if(Caffe_FOUND)
message(STATUS "Caffe: YES")
set(HAVE_CAFFE 1)
else()
message(STATUS "Caffe: NO")
# list(APPEND TEXT_DEPS opencv_dnn)
endif()

#internal dependencies
find_package(Protobuf)
if(Protobuf_FOUND)
message(STATUS "Protobuf: YES")
set(HAVE_PROTOBUF 1)
else()
message(STATUS "Protobuf: NO")
endif()

find_package(Glog)
if(Glog_FOUND)
message(STATUS "Glog: YES")
set(HAVE_GLOG 1)
else()
message(STATUS "Glog: NO")
endif()

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
#ocv_define_module(text ${TEXT_DEPS} WRAP python)

#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})

find_package(Tesseract)
if(${Tesseract_FOUND})
message(STATUS "Tesseract: YES")
include_directories(${Tesseract_INCLUDE_DIR})
target_link_libraries(opencv_text ${Tesseract_LIBS})
add_definitions(-DHAVE_TESSERACT)
else()
message(STATUS "Tesseract: NO")
endif()


if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
include_directories(${Caffe_INCLUDE_DIR})
find_package(HDF5 COMPONENTS HL REQUIRED)
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
link_directories(SYSTEM ${CUDA_LIBS})
# include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
#link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
add_definitions(-DHAVE_CAFFE)
endif() #HAVE_CAFFE

ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
message(STATUS "TEXT CAFFE SEARCH")
if()
message(STATUS "TEXT NO CAFFE CONFLICT")
else()
message(STATUS "TEXT CAFFE CONFLICT")
endif()

ocv_add_testdata(samples/ contrib/text
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
)
if(HAVE_opencv_dnn)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If dnn is enabled HAVE_OPENCV_DNN is defined in opencv_modules.hpp, so this definition is useless.

message(STATUS "dnn module found")
add_definitions(-DHAVE_DNN)
set(HAVE_DNN 1)
else()
message(STATUS "dnn module not found")
endif()
14 changes: 14 additions & 0 deletions modules/text/FindCaffe.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Caffe package for CNN Triplet training
unset(Caffe_FOUND)

find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
HINTS
/usr/local/include)

find_library(Caffe_LIBS NAMES caffe
HINTS
/usr/local/lib)

if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
set(Caffe_FOUND 1)
endif()
10 changes: 10 additions & 0 deletions modules/text/FindGlog.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#Required for Caffe
unset(Glog_FOUND)

find_library(Glog_LIBS NAMES glog
HINTS
/usr/local/lib)

if(Glog_LIBS)
set(Glog_FOUND 1)
endif()
10 changes: 10 additions & 0 deletions modules/text/FindProtobuf.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#Protobuf package required for Caffe
unset(Protobuf_FOUND)

find_library(Protobuf_LIBS NAMES protobuf
HINTS
/usr/local/lib)

if(Protobuf_LIBS)
set(Protobuf_FOUND 1)
endif()
22 changes: 22 additions & 0 deletions modules/text/FindTesseract.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Tesseract OCR
unset(Tesseract_FOUND)

find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
HINTS
/usr/include
/usr/local/include)

find_library(Tesseract_LIBRARY NAMES tesseract
HINTS
/usr/lib
/usr/local/lib)

find_library(Lept_LIBRARY NAMES lept
HINTS
/usr/lib
/usr/local/lib)

set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
set(Tesseract_FOUND 1)
endif()
80 changes: 80 additions & 0 deletions modules/text/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,83 @@ Notes
2. Tesseract configure script may fail to detect leptonica, so you may have to edit the configure script - comment off some if's around this message and retain only "then" branch.

3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages.


Text Detection CNN
=================

Intro
-----

The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.

Two backends are supported 1) caffe 2) opencv-dnn




Instalation of Caffe backend
----------------------------
* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
The caffe wrapping backend has the requirements caffe does.
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
The simplest solution is to build caffe without support for OpenCV.
* Only the OS supported by Caffe are supported by the backend.
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
Other UNIX systems including OSX should be easy to adapt.

Sample script for building Caffe

```bash
#!/bin/bash
SRCROOT="${HOME}/caffe_inst/"
mkdir -p "$SRCROOT"
cd "$SRCROOT"
git clone https://github.com/sghoshcvc/TextBoxes.git
cd TextBoxes
cat Makefile.config.example > Makefile.config
echo 'USE_OPENCV := 0' >> Makefile.config
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config


echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200
@@ -234,6 +234,7 @@

template <typename T>
friend class Net;
+ virtual ~Callback(){}
};
const vector<Callback*>& before_forward() const { return before_forward_; }
void add_before_forward(Callback* value) {
">/tmp/cleanup_caffe.diff

patch < /tmp/cleanup_caffe.diff


make -j 6

make pycaffe

make distribute
```


```bash
#!/bin/bash
cd $OPENCV_BUILD_DIR #You must set this
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04

cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules" ./


```
where $OPECV_CONTRIB is the root directory containing opencv_contrib module

Instalation of Caffe backend
----------------------------

Use of opencv-dnn does not need any additional library.

The recent opencv-3.3.0 needs to be build with extra modules to use text module.
3 changes: 2 additions & 1 deletion modules/text/include/opencv2/text.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.

#include "opencv2/text/erfilter.hpp"
#include "opencv2/text/ocr.hpp"
#include "opencv2/text/textDetector.hpp"

/** @defgroup text Scene Text Detection and Recognition

Expand Down Expand Up @@ -92,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).

To see the text detector at work, have a look at the textdetection demo:
<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this link may be invalid.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The link is valid

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it's better to remove all the occurrences of itseez. It doesn't exist anymore and all such links are redirected to opencv repo.

@defgroup text_recognize Scene Text Recognition
@}
Expand Down
1 change: 1 addition & 0 deletions modules/text/include/opencv2/text/erfilter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ component tree of the image. :
*/
struct CV_EXPORTS ERStat
{

public:
//! Constructor
explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
Expand Down
Loading