Skip to content

Gsoc text detect merge #1287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 68 additions & 19 deletions modules/text/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,73 @@
set(the_description "Text Detection and Recognition")
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python java)

if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
find_package(Tesseract QUIET)
if(Tesseract_FOUND)
message(STATUS "Tesseract: YES")
set(HAVE_TESSERACT 1)
ocv_include_directories(${Tesseract_INCLUDE_DIR})
ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
else()
message(STATUS "Tesseract: NO")
endif()

if(POLICY CMP0023)
message(STATUS "Explicitly setting policy CMP0023 to OLD")
cmake_policy(SET CMP0023 OLD)
endif(POLICY CMP0023)
# Using cmake scripts and modules
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})

set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)

find_package(Caffe)
if(Caffe_FOUND)
message(STATUS "Caffe: YES")
set(HAVE_CAFFE 1)
else()
message(STATUS "Caffe: NO")
# list(APPEND TEXT_DEPS opencv_dnn)
endif()

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
#internal dependencies
find_package(Protobuf)
if(Protobuf_FOUND)
message(STATUS "Protobuf: YES")
set(HAVE_PROTOBUF 1)
else()
message(STATUS "Protobuf: NO")
endif()

find_package(Glog)
if(Glog_FOUND)
message(STATUS "Glog: YES")
set(HAVE_GLOG 1)
else()
message(STATUS "Glog: NO")
endif()

ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
#ocv_define_module(text ${TEXT_DEPS} WRAP python)

ocv_add_testdata(samples/ contrib/text
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
)
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})

find_package(Tesseract)
if(${Tesseract_FOUND})
message(STATUS "Tesseract: YES")
include_directories(${Tesseract_INCLUDE_DIR})
target_link_libraries(opencv_text ${Tesseract_LIBS})
add_definitions(-DHAVE_TESSERACT)
else()
message(STATUS "Tesseract: NO")
endif()


if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
include_directories(${Caffe_INCLUDE_DIR})
find_package(HDF5 COMPONENTS HL REQUIRED)
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
add_definitions(-DHAVE_CAFFE)
endif() #HAVE_CAFFE

message(STATUS "TEXT CAFFE SEARCH")
if()
message(STATUS "TEXT NO CAFFE CONFLICT")
else()
message(STATUS "TEXT CAFFE CONFLICT")
endif()
14 changes: 14 additions & 0 deletions modules/text/FindCaffe.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Caffe package for CNN Triplet training
unset(Caffe_FOUND)

find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
HINTS
/usr/local/include)

find_library(Caffe_LIBS NAMES caffe
HINTS
/usr/local/lib)

if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
set(Caffe_FOUND 1)
endif()
10 changes: 10 additions & 0 deletions modules/text/FindGlog.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#Required for Caffe
unset(Glog_FOUND)

find_library(Glog_LIBS NAMES glog
HINTS
/usr/local/lib)

if(Glog_LIBS)
set(Glog_FOUND 1)
endif()
10 changes: 10 additions & 0 deletions modules/text/FindProtobuf.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#Protobuf package required for Caffe
unset(Protobuf_FOUND)

find_library(Protobuf_LIBS NAMES protobuf
HINTS
/usr/local/lib)

if(Protobuf_LIBS)
set(Protobuf_FOUND 1)
endif()
22 changes: 22 additions & 0 deletions modules/text/FindTesseract.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Tesseract OCR
unset(Tesseract_FOUND)

find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
HINTS
/usr/include
/usr/local/include)

find_library(Tesseract_LIBRARY NAMES tesseract
HINTS
/usr/lib
/usr/local/lib)

find_library(Lept_LIBRARY NAMES lept
HINTS
/usr/lib
/usr/local/lib)

set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
set(Tesseract_FOUND 1)
endif()
142 changes: 142 additions & 0 deletions modules/text/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,145 @@ Notes
2. Tesseract configure script may fail to detect leptonica, so you may have to edit the configure script - comment off some if's around this message and retain only "then" branch.

3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages.


Word spotting CNN
=================

Intro
-----

A word spotting CNN is a CNN that takes an image assumed to contain a single word and provides a probabillity over a given vocabulary.
Although other backends will be supported, for the moment only the Caffe backend is supported.




Instalation of Caffe backend
----------------------------
The caffe wrapping backend has the requirements caffe does.
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
The simplest solution is to build caffe without support for OpenCV.
* Only the OS supported by Caffe are supported by the backend.
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
Other UNIX systems including OSX should be easy to adapt.

Sample script for building Caffe

```bash
#!/bin/bash
SRCROOT="${HOME}/caffe_inst/"
mkdir -p "$SRCROOT"
cd "$SRCROOT"
git clone https://github.com/BVLC/caffe.git
cd caffe
git checkout 91b09280f5233cafc62954c98ce8bc4c204e7475
git branch 91b09280f5233cafc62954c98ce8bc4c204e7475
cat Makefile.config.example > Makefile.config
echo 'USE_OPENCV := 0' >> Makefile.config
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config


echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200
@@ -234,6 +234,7 @@

template <typename T>
friend class Net;
+ virtual ~Callback(){}
};
const vector<Callback*>& before_forward() const { return before_forward_; }
void add_before_forward(Callback* value) {
">/tmp/cleanup_caffe.diff

patch < /tmp/cleanup_caffe.diff


make -j 6

make pycaffe

make distribute
```


```bash
#!/bin/bash
cd $OPENCV_BUILD_DIR #You must set this
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04

cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules" ./


```

Text Detection CNN
=================

Intro
-----

A text detection CNN is a CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there.
Although other backends will be supported, for the moment only the Caffe backend is supported.




Instalation of Caffe backend
----------------------------
* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
The caffe wrapping backend has the requirements caffe does.
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
The simplest solution is to build caffe without support for OpenCV.
* Only the OS supported by Caffe are supported by the backend.
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
Other UNIX systems including OSX should be easy to adapt.

Sample script for building Caffe

```bash
#!/bin/bash
SRCROOT="${HOME}/caffe_inst/"
mkdir -p "$SRCROOT"
cd "$SRCROOT"
git clone https://github.com/sghoshcvc/TextBoxes.git
cd TextBoxes
cat Makefile.config.example > Makefile.config
echo 'USE_OPENCV := 0' >> Makefile.config
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config


echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200
@@ -234,6 +234,7 @@

template <typename T>
friend class Net;
+ virtual ~Callback(){}
};
const vector<Callback*>& before_forward() const { return before_forward_; }
void add_before_forward(Callback* value) {
">/tmp/cleanup_caffe.diff

patch < /tmp/cleanup_caffe.diff


make -j 6

make pycaffe

make distribute
```


```bash
#!/bin/bash
cd $OPENCV_BUILD_DIR #You must set this
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04

cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules" ./


```
3 changes: 2 additions & 1 deletion modules/text/include/opencv2/text.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.

#include "opencv2/text/erfilter.hpp"
#include "opencv2/text/ocr.hpp"
#include "opencv2/text/textDetector.hpp"

/** @defgroup text Scene Text Detection and Recognition

Expand Down Expand Up @@ -92,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).

To see the text detector at work, have a look at the textdetection demo:
<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>

@defgroup text_recognize Scene Text Recognition
@}
Expand Down
1 change: 1 addition & 0 deletions modules/text/include/opencv2/text/erfilter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ component tree of the image. :
*/
struct CV_EXPORTS ERStat
{

public:
//! Constructor
explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
Expand Down
Loading