-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Gsoc-2017 Text detect and recognition dnn backend #1348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
9ae765a
40db962
fc9c41b
e494efb
2b8ed12
be395e5
1bc908b
73ddeab
9071ca7
8cf800e
a617059
ca2a2ab
b913cac
4c9af58
103fbaf
0e74d63
111b3be
a2cab07
c697e41
731637e
dc48968
e98f42e
af536b1
efc864c
887e6e5
878258b
bf630be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,77 @@ | ||
set(the_description "Text Detection and Recognition") | ||
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python java) | ||
|
||
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT) | ||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) | ||
find_package(Tesseract QUIET) | ||
if(Tesseract_FOUND) | ||
message(STATUS "Tesseract: YES") | ||
set(HAVE_TESSERACT 1) | ||
ocv_include_directories(${Tesseract_INCLUDE_DIR}) | ||
ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES}) | ||
else() | ||
message(STATUS "Tesseract: NO") | ||
endif() | ||
|
||
# Using cmake scripts and modules | ||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) | ||
|
||
set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d) | ||
|
||
find_package(Caffe) | ||
if(Caffe_FOUND) | ||
message(STATUS "Caffe: YES") | ||
set(HAVE_CAFFE 1) | ||
else() | ||
message(STATUS "Caffe: NO") | ||
# list(APPEND TEXT_DEPS opencv_dnn) | ||
endif() | ||
|
||
#internal dependencies | ||
find_package(Protobuf) | ||
if(Protobuf_FOUND) | ||
message(STATUS "Protobuf: YES") | ||
set(HAVE_PROTOBUF 1) | ||
else() | ||
message(STATUS "Protobuf: NO") | ||
endif() | ||
|
||
find_package(Glog) | ||
if(Glog_FOUND) | ||
message(STATUS "Glog: YES") | ||
set(HAVE_GLOG 1) | ||
else() | ||
message(STATUS "Glog: NO") | ||
endif() | ||
|
||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in | ||
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY) | ||
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python) | ||
#ocv_define_module(text ${TEXT_DEPS} WRAP python) | ||
|
||
ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR}) | ||
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) | ||
|
||
ocv_add_testdata(samples/ contrib/text | ||
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg" | ||
) | ||
find_package(Tesseract) | ||
if(${Tesseract_FOUND}) | ||
message(STATUS "Tesseract: YES") | ||
include_directories(${Tesseract_INCLUDE_DIR}) | ||
target_link_libraries(opencv_text ${Tesseract_LIBS}) | ||
add_definitions(-DHAVE_TESSERACT) | ||
else() | ||
message(STATUS "Tesseract: NO") | ||
endif() | ||
|
||
|
||
if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF) | ||
include_directories(${Caffe_INCLUDE_DIR}) | ||
find_package(HDF5 COMPONENTS HL REQUIRED) | ||
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) | ||
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES}) | ||
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem) | ||
include_directories(SYSTEM ${Boost_INCLUDE_DIR}) | ||
include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ ) | ||
link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This hard-coding is something to fix. If possible we should use the cuda detected by CMake rather than hard coding. The question to answer is what happens when cuda 9.0 comes out. Would this be broken? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I'll look into it, we should certainly use CUDA detected by opencv, Though this was part of Anguelos's code so we should also ask his opinion. |
||
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES}) | ||
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES}) | ||
add_definitions(-DHAVE_CAFFE) | ||
endif() #HAVE_CAFFE | ||
|
||
message(STATUS "TEXT CAFFE SEARCH") | ||
if() | ||
message(STATUS "TEXT NO CAFFE CONFLICT") | ||
else() | ||
message(STATUS "TEXT CAFFE CONFLICT") | ||
endif() | ||
|
||
if(HAVE_opencv_dnn) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If dnn is enabled |
||
message(STATUS "dnn module found") | ||
add_definitions(-DHAVE_DNN) | ||
set(HAVE_DNN 1) | ||
else() | ||
message(STATUS "dnn module not found") | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Caffe package for CNN Triplet training | ||
unset(Caffe_FOUND) | ||
|
||
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp | ||
HINTS | ||
/usr/local/include) | ||
|
||
find_library(Caffe_LIBS NAMES caffe | ||
HINTS | ||
/usr/local/lib) | ||
|
||
if(Caffe_LIBS AND Caffe_INCLUDE_DIR) | ||
set(Caffe_FOUND 1) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#Required for Caffe | ||
unset(Glog_FOUND) | ||
|
||
find_library(Glog_LIBS NAMES glog | ||
HINTS | ||
/usr/local/lib) | ||
|
||
if(Glog_LIBS) | ||
set(Glog_FOUND 1) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#Protobuf package required for Caffe | ||
unset(Protobuf_FOUND) | ||
|
||
find_library(Protobuf_LIBS NAMES protobuf | ||
HINTS | ||
/usr/local/lib) | ||
|
||
if(Protobuf_LIBS) | ||
set(Protobuf_FOUND 1) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Tesseract OCR | ||
unset(Tesseract_FOUND) | ||
|
||
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h | ||
HINTS | ||
/usr/include | ||
/usr/local/include) | ||
|
||
find_library(Tesseract_LIBRARY NAMES tesseract | ||
HINTS | ||
/usr/lib | ||
/usr/local/lib) | ||
|
||
find_library(Lept_LIBRARY NAMES lept | ||
HINTS | ||
/usr/lib | ||
/usr/local/lib) | ||
|
||
set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY}) | ||
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR) | ||
set(Tesseract_FOUND 1) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,3 +47,145 @@ Notes | |
2. Tesseract configure script may fail to detect leptonica, so you may have to edit the configure script - comment off some if's around this message and retain only "then" branch. | ||
|
||
3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages. | ||
|
||
|
||
Word spotting CNN | ||
================= | ||
|
||
Intro | ||
----- | ||
|
||
A word spotting CNN is a CNN that takes an image assumed to contain a single word and provides a probabillity over a given vocabulary. | ||
Although other backends will be supported, for the moment only the Caffe backend is supported. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sentence needs to be updated to include the DNN backend. |
||
|
||
|
||
|
||
|
||
Instalation of Caffe backend | ||
---------------------------- | ||
The caffe wrapping backend has the requirements caffe does. | ||
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises. | ||
The simplest solution is to build caffe without support for OpenCV. | ||
* Only the OS supported by Caffe are supported by the backend. | ||
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system. | ||
Other UNIX systems including OSX should be easy to adapt. | ||
|
||
Sample script for building Caffe | ||
|
||
```bash | ||
#!/bin/bash | ||
SRCROOT="${HOME}/caffe_inst/" | ||
mkdir -p "$SRCROOT" | ||
cd "$SRCROOT" | ||
git clone https://github.com/BVLC/caffe.git | ||
cd caffe | ||
git checkout 91b09280f5233cafc62954c98ce8bc4c204e7475 | ||
git branch 91b09280f5233cafc62954c98ce8bc4c204e7475 | ||
cat Makefile.config.example > Makefile.config | ||
echo 'USE_OPENCV := 0' >> Makefile.config | ||
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config | ||
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config | ||
|
||
|
||
echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200 | ||
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200 | ||
@@ -234,6 +234,7 @@ | ||
|
||
template <typename T> | ||
friend class Net; | ||
+ virtual ~Callback(){} | ||
}; | ||
const vector<Callback*>& before_forward() const { return before_forward_; } | ||
void add_before_forward(Callback* value) { | ||
">/tmp/cleanup_caffe.diff | ||
|
||
patch < /tmp/cleanup_caffe.diff | ||
|
||
|
||
make -j 6 | ||
|
||
make pycaffe | ||
|
||
make distribute | ||
``` | ||
|
||
|
||
```bash | ||
#!/bin/bash | ||
cd $OPENCV_BUILD_DIR #You must set this | ||
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04 | ||
|
||
cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules" ./ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The extra modules path has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
|
||
|
||
``` | ||
|
||
Text Detection CNN | ||
================= | ||
|
||
Intro | ||
----- | ||
|
||
A text detection CNN is a CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. | ||
Although other backends will be supported, for the moment only the Caffe backend is supported. | ||
|
||
|
||
|
||
|
||
Instalation of Caffe backend | ||
---------------------------- | ||
* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below | ||
The caffe wrapping backend has the requirements caffe does. | ||
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises. | ||
The simplest solution is to build caffe without support for OpenCV. | ||
* Only the OS supported by Caffe are supported by the backend. | ||
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system. | ||
Other UNIX systems including OSX should be easy to adapt. | ||
|
||
Sample script for building Caffe | ||
|
||
```bash | ||
#!/bin/bash | ||
SRCROOT="${HOME}/caffe_inst/" | ||
mkdir -p "$SRCROOT" | ||
cd "$SRCROOT" | ||
git clone https://github.com/sghoshcvc/TextBoxes.git | ||
cd TextBoxes | ||
cat Makefile.config.example > Makefile.config | ||
echo 'USE_OPENCV := 0' >> Makefile.config | ||
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config | ||
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config | ||
|
||
|
||
echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200 | ||
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200 | ||
@@ -234,6 +234,7 @@ | ||
|
||
template <typename T> | ||
friend class Net; | ||
+ virtual ~Callback(){} | ||
}; | ||
const vector<Callback*>& before_forward() const { return before_forward_; } | ||
void add_before_forward(Callback* value) { | ||
">/tmp/cleanup_caffe.diff | ||
|
||
patch < /tmp/cleanup_caffe.diff | ||
|
||
|
||
make -j 6 | ||
|
||
make pycaffe | ||
|
||
make distribute | ||
``` | ||
|
||
|
||
```bash | ||
#!/bin/bash | ||
cd $OPENCV_BUILD_DIR #You must set this | ||
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04 | ||
|
||
cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules" ./ | ||
|
||
|
||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage. | |
|
||
#include "opencv2/text/erfilter.hpp" | ||
#include "opencv2/text/ocr.hpp" | ||
#include "opencv2/text/textDetector.hpp" | ||
|
||
/** @defgroup text Scene Text Detection and Recognition | ||
|
||
|
@@ -92,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D | |
in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping). | ||
|
||
To see the text detector at work, have a look at the textdetection demo: | ||
<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp> | ||
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp> | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this link may be invalid. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The link is valid There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But it's better to remove all the occurrences of itseez. It doesn't exist anymore and all such links are redirected to opencv repo. |
||
@defgroup text_recognize Scene Text Recognition | ||
@} | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This variable is unused. Also all the commented code should be removed form the cmake script.