ispras
diff --git a/‎.flake8‎
Lines changed: 2 additions & 1 deletion b/‎.flake8‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/workflows/docs.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/docs.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 7 additions & 0 deletions b/‎README.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dedoc/api/api_args.py‎
Lines changed: 2 additions & 0 deletions b/‎dedoc/api/api_args.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎dedoc/api/schema/annotation.py‎
Lines changed: 10 additions & 0 deletions b/‎dedoc/api/schema/annotation.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎dedoc/api/schema/cell_with_meta.py‎
Lines changed: 10 additions & 0 deletions b/‎dedoc/api/schema/cell_with_meta.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎dedoc/api/schema/document_content.py‎
Lines changed: 6 additions & 0 deletions b/‎dedoc/api/schema/document_content.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dedoc/api/schema/document_metadata.py‎
Lines changed: 20 additions & 0 deletions b/‎dedoc/api/schema/document_metadata.py‎
Lines changed: 20 additions & 0 deletions
@@ -28,6 +28,7 @@ exclude =
     *__init__.py,
     resources,
     venv,
+    .venv,
     build,
     dedoc.egg-info,
     docs/_build,
@@ -48,5 +49,5 @@ per-file-ignores =
     scripts/*:T201
     scripts/benchmark_pdf_performance*:JS101
     tests/custom_test_runner.py:ANN001,ANN201,ANN202,ANN204,N802
-    docs/source/_static/code_examples/*:I251
+    docs/source/_static/code_examples/*:I251,T201
     docs/source/_static/code_examples/langchain/*:FOL001,FOL002,FOL003,FOL004,FOL005,I100,I202,I251
@@ -33,3 +33,4 @@ jobs:
         python dedoc_usage_tutorial.py
         python dedoc_add_new_doc_type_tutorial.py
         python dedoc_add_new_structure_type_tutorial.py
+        python dedoc_using_patterns_tutorial.py
@@ -3,7 +3,7 @@ repos:
     rev: 5.0.4
     hooks:
     -   id: flake8
-        exclude: \.github|.*__init__\.py|resources|docs|venv|build|dedoc\.egg-info|scripts/fintoc2022/metric.py
+        exclude: \.github|.*__init__\.py|resources|docs|venv|\.venv|build|dedoc\.egg-info|scripts/fintoc2022/metric.py
         args:
             - "--config=.flake8"
         additional_dependencies: [
 
@@ -1,5 +1,6 @@
 # Dedoc
 
+[![Telegram](https://img.shields.io/badge/chat-on%20Telegram-2ba2d9.svg)](https://t.me/dedoc_chat)
 [![image](https://img.shields.io/pypi/pyversions/dedoc.svg)](https://pypi.python.org/pypi/dedoc)
 [![GitHub release](https://img.shields.io/github/release/ispras/dedoc.svg)](https://github.com/ispras/dedoc/releases/)
 [![PyPI version](https://badge.fury.io/py/dedoc.svg)](https://badge.fury.io/py/dedoc)
@@ -94,6 +95,12 @@ Relevant documentation of dedoc is available [here](https://dedoc.readthedocs.io
 * Article on habr.com [Dedoc: как автоматически извлечь из текстового документа всё и даже немного больше](https://habr.com/ru/companies/isp_ras/articles/779390/) in Russian (2023)
 * Article [Dedoc: A Universal System for Extracting Content and Logical Structure From Textual Documents](https://ieeexplore.ieee.org/abstract/document/10508151/) in English (2023)
 
+# Join Our Community
+
+Have questions or want to discuss Dedoc? Join our [Telegram chat](https://t.me/dedoc_chat) and connect with the community and the developers.
+
+Join our [Telegram channel](https://t.me/dedoc_channel) to get notifications about the most recent updates.
+
 # Installation instructions
 
 This project has a REST api and you can run it in Docker container.
 
@@ -1 +1 @@
-2.2.7
+2.3
@@ -8,6 +8,7 @@
 class QueryParameters:
     # type of document structure parsing
     document_type: str = Form("", enum=["", "law", "tz", "diploma", "article", "fintoc"], description="Document domain")
+    patterns: str = Form("", description='Patterns for default document type (when document_type="")')
     structure_type: str = Form("tree", enum=["linear", "tree"], description="Output structure type")
     return_format: str = Form("json", enum=["json", "html", "plain_text", "tree", "collapsed_tree", "ujson", "pretty_json"],
                               description="Response representation, most types (except json) are used for debug purposes only")
@@ -39,6 +40,7 @@ class QueryParameters:
                                                  '"no_change" - set vertical orientation of the document without using an orientation classifier')
     need_header_footer_analysis: str = Form("false", enum=["true", "false"], description="Exclude headers and footers from PDF parsing result")
     need_binarization: str = Form("false", enum=["true", "false"], description="Binarize document pages (for images or PDF without a textual layer)")
+    need_gost_frame_analysis: str = Form("false", enum=["true", "false"], description="Parameter for detecting and ignoring GOST frame of the document")
 
     # other formats handling
     delimiter: Optional[str] = Form(None, description="Column separator for CSV files")
 
@@ -5,6 +5,16 @@ class Annotation(BaseModel):
     """
     The piece of information about the text line: it's appearance or links to another document object.
     For example Annotation(1, 13, "italic", "True") says that text between 1st and 13th symbol was written in italic.
+
+    :ivar start: start of the annotated text
+    :ivar end: end of the annotated text (end isn't included)
+    :ivar name: annotation's name, specific for each type of annotation
+    :ivar value: information about annotated text, depends on the type of annotation, e.g. "True"/"False", "10.0", etc.
+
+    :vartype start: int
+    :vartype end: int
+    :vartype name: str
+    :vartype value: str
     """
     start: int = Field(description="Start of the annotated text", example=0)
     end: int = Field(description="End of the annotated text (end isn't included)", example=5)
 
@@ -8,6 +8,16 @@
 class CellWithMeta(BaseModel):
     """
     Holds the information about the cell: list of lines and cell properties (rowspan, colspan, invisible).
+
+    :ivar lines: list of textual lines of the cell
+    :ivar colspan: number of columns to span (for cells merged horizontally)
+    :ivar rowspan: number of rows to span (for cells merged vertically)
+    :ivar invisible: indicator for displaying or hiding cell text - cells that are merged with others are hidden (for HTML display)
+
+    :vartype lines: List[LineWithMeta]
+    :vartype colspan: int
+    :vartype rowspan: int
+    :vartype invisible: bool
     """
     lines: List[LineWithMeta] = Field(description="Textual lines of the cell with annotations")
     rowspan: int = Field(description="Number of rows to span like in HTML format", example=1)
 
@@ -9,6 +9,12 @@
 class DocumentContent(BaseModel):
     """
     Content of the document - structured text and tables.
+
+    :ivar tables: list of document tables
+    :ivar structure: tree structure of the document nodes with text and additional metadata
+
+    :vartype tables: List[Table]
+    :vartype structure: TreeNode
     """
     structure: TreeNode = Field(description="Tree structure where content of the document is organized")
     tables: List[Table] = Field(description="List of document tables")
@@ -4,6 +4,26 @@
 class DocumentMetadata(BaseModel):
     """
     Document metadata like its name, size, author, etc.
+
+    :ivar file_name: original document name (before rename and conversion, so it can contain non-ascii symbols, spaces and so on)
+    :ivar temporary_file_name: file name during parsing (unique name after rename and conversion)
+    :ivar size: size of the original file in bytes
+    :ivar modified_time: time of the last modification in unix time format (seconds since the epoch)
+    :ivar created_time: time of the creation in unixtime
+    :ivar access_time: time of the last access to the file in unixtime
+    :ivar file_type: mime type of the file
+    :ivar uid: document unique identifier (useful for attached files)
+
+    :vartype file_name: str
+    :vartype temporary_file_name: str
+    :vartype size: int
+    :vartype modified_time: int
+    :vartype created_time: int
+    :vartype access_time: int
+    :vartype file_type: str
+    :vartype uid: str
+
+    Additional variables may be added with other file metadata.
     """
     class Config:
         extra = Extra.allow