Skip to content

Commit 765aae2

Browse files
NastyBogetsunveilalexander1999-hubAlexander GolodkovTravvy88
authored
new version 2.2.7 (#486)
Co-authored-by: Andrey Mikhailov <mikhailov@icc.ru> Co-authored-by: Alexander Golodkov <55749660+alexander1999-hub@users.noreply.github.com> Co-authored-by: Alexander Golodkov <golodkov@ispras.ru> Co-authored-by: Nikita Shevtsov <61932814+Travvy88@users.noreply.github.com> Co-authored-by: Nikita Shevtsov <shevtsov@ispras.ru> Co-authored-by: Oksana Belyaeva <belyaeva@ispras.ru>
1 parent d67e6ef commit 765aae2

File tree

79 files changed

+1187
-768
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+1187
-768
lines changed

.github/workflows/test_labeling.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,4 @@ jobs:
3131
python-version: '3.9'
3232
- name: Run tests for labeling
3333
run: |
34-
test="true" docker-compose -f labeling/docker-compose.yml up --build --exit-code-from test
34+
test="true" docker compose -f labeling/docker-compose.yml up --build --exit-code-from test

.github/workflows/test_on_push.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ jobs:
3636
flake8 .
3737
- name: Run tests
3838
run: |
39-
test="true" docker-compose up --build --exit-code-from test
39+
test="true" docker compose up --build --exit-code-from test

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ It extracts a document’s logical structure and content: tables, text formattin
1717
The document’s content is represented as a tree storing headings and lists of any level.
1818
Dedoc can be integrated in a document contents and structure analysis system as a separate module.
1919

20-
## Workflow
20+
## Star History
21+
[![Star History Chart](https://api.star-history.com/svg?repos=ispras/dedoc&type=Date)](https://tar-history.com/#ispras/dedoc&Date)
2122

23+
## Workflow
2224
![Workflow](https://github.com/ispras/dedoc/raw/master/docs/source/_static/workflow.png)
2325

2426
Workflow description is given [`here`](https://dedoc.readthedocs.io/en/latest/?badge=latest#workflow)
@@ -136,12 +138,12 @@ cd dedoc
136138

137139
### 3. Build the image and run the application
138140
```shell
139-
docker-compose up --build
141+
docker compose up --build
140142
```
141143

142144
### 4. Run container with tests
143145
```shell
144-
test="true" docker-compose up --build
146+
test="true" docker compose up --build
145147
```
146148

147149
If you need to change some application settings, you may update `config.py` according to your needs and re-build the image.

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.2.6
1+
2.2.7

dedoc/api/api_args.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ class QueryParameters:
2828
# pdf handling
2929
pdf_with_text_layer: str = Form("auto_tabby", enum=["true", "false", "auto", "auto_tabby", "tabby"],
3030
description="Extract text from a text layer of PDF or using OCR methods for image-like documents")
31+
fast_textual_layer_detection: str = Form("false", enum=["true", "false"],
32+
description="Use non-ML solution to detect textual layer. Much faster but less accurate.")
3133
language: str = Form("rus+eng", description="Recognition language ('rus+eng', 'rus', 'eng', 'fra', 'spa')")
3234
pages: str = Form(":", description='Page numbers range for reading PDF or images, "left:right" means read pages from left to right')
3335
is_one_column_document: str = Form("auto", enum=["auto", "true", "false"],

dedoc/api/web/index.html

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ <h4>PDF handling</h4>
128128
</label>
129129
</p>
130130

131+
<p>
132+
<label><input name="fast_textual_layer_detection" type="checkbox" value="true"> fast_textual_layer_detection</label>
133+
</p>
134+
131135
<p>
132136
<label> language
133137
<input name="language" list="language" size="8" placeholder="rus+eng">

dedoc/data_structures/concrete_annotations/table_annotation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ class TableAnnotation(Annotation):
88
"""
99
name = "table"
1010

11-
def __init__(self, name: str, start: int, end: int) -> None:
11+
def __init__(self, value: str, start: int, end: int) -> None:
1212
"""
13-
:param name: unique identifier of the table which is referenced inside this annotation
13+
:param value: unique identifier of the table which is referenced inside this annotation
1414
:param start: start of the annotated text (usually zero)
1515
:param end: end of the annotated text (usually end of the line)
1616
"""
17-
super().__init__(start=start, end=end, name=TableAnnotation.name, value=name, is_mergeable=False)
17+
super().__init__(start=start, end=end, name=TableAnnotation.name, value=value, is_mergeable=False)

dedoc/data_structures/line_with_meta.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ def uid(self) -> str:
136136
def set_line(self, line: str) -> None:
137137
self._line = line
138138

139+
def set_metadata(self, metadata: LineMetadata) -> None:
140+
self._metadata = metadata
141+
139142
def __repr__(self) -> str:
140143
return (f"LineWithMeta({self.line[:65]}, "
141144
f"tagHL={self.metadata.tag_hierarchy_level.level_1, self.metadata.tag_hierarchy_level.level_2, self.metadata.tag_hierarchy_level.line_type})")

dedoc/download_models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
"""
77
model_hash_dict = dict(
88
txtlayer_classifier="9ca1de749d8d37147b00a3a228e03ee1776c695f",
9-
scan_orientation_efficient_net_b0="9ea283f3d346ae4fdd82463a9f60b5369a3ffb58",
9+
scan_orientation_efficient_net_b0="c60812552a1be624476c1e5b58599867b36f8d4e",
1010
font_classifier="db4481ad60ab050cbb42079b64f97f9e431feb07",
11-
paragraph_classifier="c26a10193499d3cbc77ffec9842bece24fa8950b",
12-
line_type_classifiers="0568c6e1f49612c0c351f10b80a26dc05f796683",
11+
paragraph_classifier="97c4b78bc20d87ec7d53389e09f1ca35c6ade067",
12+
line_type_classifiers="6ad0eacbfdea065b658cb6f039d13f75245d51ae",
1313
fintoc_classifiers="6a907b7d2437c3f61ac9c506f67175207982fae8"
1414
)
1515

dedoc/readers/article_reader/article_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def __create_line_with_refs(self, content: List[Tuple[str, Tag]], bib2uid: dict,
235235
if subpart.get("type") == "bibr" and target in bib2uid:
236236
annotations.append(ReferenceAnnotation(value=bib2uid[target], start=start, end=start + len(sub_text)))
237237
if subpart.get("type") == "table" and target in table2uid:
238-
annotations.append(TableAnnotation(name=table2uid[target], start=start, end=start + len(sub_text)))
238+
annotations.append(TableAnnotation(value=table2uid[target], start=start, end=start + len(sub_text)))
239239
if subpart.get("type") == "figure" and target in attachment2uid:
240240
annotations.append(AttachAnnotation(attach_uid=attachment2uid[target], start=start, end=start + len(sub_text)))
241241
else:

0 commit comments

Comments
 (0)