File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change 5
5
all : all-tess all-hunspell
6
6
7
7
install :
8
- sudo apt-get install sqlite3 wget
8
+ sudo apt-get install sqlite3 wget icu-devtools
9
9
sudo add-apt-repository -u -y ppa:alex-p/tesseract-ocr
10
10
sudo apt-get update
11
11
sudo apt-get install tesseract-ocr
Original file line number Diff line number Diff line change 1
1
#! /bin/sh
2
- sqlite3 -list -readonly -noheader ${1:- lexdb.sqlite} << EOF | grep -v -e '^[[:punct:]]' -e '^[[:digit:][:punct:]]*$' > ${2:- lexdb_${3:- 100} .words}
2
+ sqlite3 -list -readonly -noheader ${1:- lexdb.sqlite} << EOF | uconv -f utf-8 -t utf-8 -x "::nfc;" | grep -v -e '^[[:punct:]]' -e '^[[:digit:][:punct:]]*$' > ${2:- lexdb_${3:- 100} .words}
3
3
select trim(u) from csv where f > ${3:- 100} and p != "\$ (" and p != "\$ ," and p != "\$ ." and p != "FM.xy" and p != "CARD" and p != "XY";
4
4
EOF
You can’t perform that action at this time.
0 commit comments