Skip to content

Commit 087c50d

Browse files
committed
Fix: docker images
1 parent 1eacb56 commit 087c50d

File tree

3 files changed

+19
-12
lines changed

3 files changed

+19
-12
lines changed

Dockerfile

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11

22
FROM ubuntu:21.04
33

4-
# Cópia de arquivos do projeto OCR-SERVER
5-
COPY usr/local/bin/ocr /usr/local/bin/ocr
6-
COPY etc/init.d/ocr-ubuntu /etc/init.d/ocr
7-
COPY entrypoint.sh /entrypoint.sh
8-
94
WORKDIR /tmp
105

116
ENV TZ=Etc/UTC
@@ -14,16 +9,23 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
149
# Instalação dos pacotes pré-requisitos do ocr-server 2
1510
RUN apt-get -y update && \
1611
apt-get install -y tesseract-ocr tesseract-ocr-por tesseract-ocr-eng tesseract-ocr-spa leptonica-progs \
17-
poppler-utils pdftk unpaper ocaml ghostscript imagemagick libcamlpdf-ocaml \
12+
poppler-utils pdftk unpaper ocaml ghostscript imagemagick libcamlpdf-ocaml rsyslog \
1813
wget perl libfile-find-rule-perl libfile-touch-perl libunix-syslog-perl
1914

2015
RUN wget \
2116
https://raw.githubusercontent.com/coherentgraphics/cpdf-binaries/master/Linux-Intel-64bit/cpdf \
22-
-o /usr/local/bin/cpdf && \
17+
-O /usr/local/bin/cpdf && \
2318
chmod 755 /usr/local/bin/cpdf
2419

20+
RUN update-rc.d rsyslog defaults
21+
2522
RUN useradd -m ocr
2623

24+
# Cópia de arquivos do projeto OCR-SERVER
25+
COPY usr/local/bin/ocr /usr/local/bin/ocr
26+
COPY etc/init.d/ocr-ubuntu /etc/init.d/ocr
27+
COPY entrypoint.sh /entrypoint.sh
28+
2729
RUN chmod +x /usr/local/bin/ocr && \
2830
chmod +x /etc/init.d/ocr && \
2931
update-rc.d ocr defaults

entrypoint.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22

33
# Inicializa serviço de log
4-
/etc/init.d/rsyslog start
4+
service rsyslog start
55

66
# Cria estrutura de pastas para monitoramento de arquivos
77
mkdir -p /var/ocr-server/
@@ -14,4 +14,7 @@ chmod -R 777 /var/ocr-server
1414
# Iniciar serviço do OCR-Server
1515
service ocr start
1616

17-
tail -f /var/log/syslog
17+
while [ 1 ]; do
18+
tail -f /var/log/syslog
19+
sleep 1;
20+
done

usr/local/bin/ocr

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@
4444
# Now using '-oem 1' option to tesseract as there is an issue with default OS trained data
4545
# Fix: detection of files with unicode filenames
4646
# Fix: now uses current shell to spawn process, plain open3 where not using default shell env, needed for java progs
47-
# 2.1.1 Fixed docker on Ubuntu 20.04
47+
# 2.1.1 Fixed docker on Ubuntu 21.04
4848
# Color conversion to gray is off by default
4949
# Fix: new images format from INPE samples
50+
# Fix: now uses default tesseract algorithms
5051
#
5152
# TODO: - Changes get_imgs and OCR processing to enable pages with more than one image -- it
5253
# would not work on previous versions that assumed #pages = #imgs. Version 1.0.1 counts them
@@ -99,8 +100,8 @@ my $COLOR_THRES = .03; # Min color spread, below this value, will convert image
99100
# Command dependencies
100101

101102
# depends on tesseract-ocr an tesseract-ocr-por 3.05-dev or higher -- for pdf/a Tesseract 4.0 is recomended
102-
my $TESSERACT = 'tesseract --oem 1'; # if Tesseract => 4.0
103-
#my $TESSERACT = 'tesseract'; # if Tesseract < 4.0
103+
#my $TESSERACT = 'tesseract --oem 2'; # if Tesseract => 4.0
104+
my $TESSERACT = 'tesseract'; # if Tesseract < 4.0
104105

105106
# Depends on pdftk 2.02 or higher
106107
my $PDFTK = 'pdftk';
@@ -142,6 +143,7 @@ my %SUB_DIRS = ( 'IN'=>'Entrada', 'OUT'=>'Saida', 'PROC'=>'Originais_Processados
142143
# Safeguard im case of cpuinfo has not identified correctly the number of CPUs
143144
$MAX_PGS = ($MAX_PGS==0) ? 4 : $MAX_PGS;
144145

146+
$ENV{'SHELL'} = exists $ENV{'SHELL'} ? $ENV{'SHELL'} : '/bin/bash';
145147
$ENV{'PATH'} = '/usr/local/bin:/usr/bin:/bin';
146148
$ENV{'IFS'} = '\t\n';
147149

0 commit comments

Comments
 (0)