|
30 | 30 | # Any additional custom configuration flags that are not available via the tesseract function. For example: config='--psm 6' |
31 | 31 | TESSERACT_CUSTOM_CONFIG_FLAGS = os.environ.get("OCR_SERVICE_TESSERACT_CUSTOM_CONFIG_FLAGS", "") |
32 | 32 |
|
33 | | -# controls both threads and cpus for one WEB SERVICE thread, basically, one request handler. |
34 | | -# this is derived normally from the amount of threads Gunicorn is running with, for example: |
| 33 | +# Controls both threads and cpus for one WEB SERVICE thread, basically, one request handler. |
| 34 | +# This is derived normally from the amount of threads Gunicorn is running with, for example: |
35 | 35 | # - if we have OCR_SERVICE_THREADS = 4, the OCR service can handle at most 4 requests at the same time, |
36 | 36 | # this means that you can not use all of your CPUS for OCR-ing for 1 request, |
37 | 37 | # because that means the other requests are sitting idle while the first one uses all resources, |
38 | 38 | # and so it is recommended to regulate the number of threads per request |
39 | | -OCR_WEB_SERVICE_THREADS = int(os.environ.get("OCR_WEB_SERVICE_THREADS", multiprocessing.cpu_count())) |
| 39 | +OCR_WEB_SERVICE_THREADS = int(os.environ.get("OCR_WEB_SERVICE_THREADS", 1)) |
| 40 | + |
| 41 | +# This controls the number of workers the ocr service may have it is recommended to use this value |
| 42 | +# instead of OCR_WEB_SERVICE_THREADS if you want to process multiple requests in parallel |
| 43 | +# WARNING: using more than 1 workers assumes you have set the OCR_WEB_SERVICE_WORKER_CLASS setting to sync ! |
| 44 | +# with the above mentioned, setting OCR_WEB_SERVICE_WORKER_CLASS to sync means that a worker will use 1 THREAD only, |
| 45 | +# therefore OCR_WEB_SERVICE_THREADS is disregarded |
| 46 | +OCR_WEB_SERVICE_WORKERS = int(os.environ.get("OCR_WEB_SERVICE_WORKERS", 1)) |
40 | 47 |
|
41 | 48 | # set this to control the number of threads used for OCR-ing per web request thread (check OCR_WEB_SERVICE_THREADS) |
42 | | -CPU_THREADS = int(os.environ.get("OCR_SERVICE_CPU_THREADS", (multiprocessing.cpu_count() / OCR_WEB_SERVICE_THREADS))) |
| 49 | +CPU_THREADS = int(os.environ.get("OCR_SERVICE_CPU_THREADS", (multiprocessing.cpu_count() / OCR_WEB_SERVICE_WORKERS))) |
43 | 50 |
|
44 | 51 | # conversion thread number for the pdf -> PIL img conversion, per web request thread (check OCR_WEB_SERVICE_THREADS) |
45 | | -CONVERTER_THREAD_NUM = int(os.environ.get("OCR_SERVICE_CONVERTER_THREADS", (multiprocessing.cpu_count() / OCR_WEB_SERVICE_THREADS))) |
| 52 | +CONVERTER_THREAD_NUM = int(os.environ.get("OCR_SERVICE_CONVERTER_THREADS", (multiprocessing.cpu_count() / OCR_WEB_SERVICE_WORKERS))) |
46 | 53 |
|
47 | 54 | # should we convert detected images to greyscale before OCR-ing |
48 | 55 | OCR_CONVERT_GRAYSCALE_IMAGES = True |
|
62 | 69 | # a libre office server will only use 1 CPU by default (not changable), thus, |
63 | 70 | # for handling multiple requests, we will have one service per OCR_WEB_SERVICE_THREAD |
64 | 71 | DEFAULT_LIBRE_OFFICE_SERVER_PORT = 9900 |
65 | | -LIBRE_OFFICE_LISTENER_PORT_RANGE = range(DEFAULT_LIBRE_OFFICE_SERVER_PORT, DEFAULT_LIBRE_OFFICE_SERVER_PORT + OCR_WEB_SERVICE_THREADS) |
| 72 | +LIBRE_OFFICE_LISTENER_PORT_RANGE = range(DEFAULT_LIBRE_OFFICE_SERVER_PORT, DEFAULT_LIBRE_OFFICE_SERVER_PORT + OCR_WEB_SERVICE_THREADS + OCR_WEB_SERVICE_WORKERS) |
66 | 73 |
|
67 | 74 | LIBRE_OFFICE_NETWORK_INTERFACE = "localhost" |
68 | 75 |
|
| 76 | + |
| 77 | +# seconds to check for possible failure of port |
69 | 78 | LIBRE_OFFICE_PROCESSES_LISTENER_INTERVAL = 10 |
70 | 79 |
|
71 | 80 |
|
|
0 commit comments