diff --git a/docs/extras/code_samples/proof_of_address_v1.txt b/docs/extras/code_samples/proof_of_address_v1.txt deleted file mode 100644 index 551ff05d..00000000 --- a/docs/extras/code_samples/proof_of_address_v1.txt +++ /dev/null @@ -1,17 +0,0 @@ -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.ProofOfAddressV1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) diff --git a/docs/extras/code_samples/us_w9_v1.txt b/docs/extras/code_samples/us_w9_v1.txt deleted file mode 100644 index 9290950a..00000000 --- a/docs/extras/code_samples/us_w9_v1.txt +++ /dev/null @@ -1,17 +0,0 @@ -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.us.W9V1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) diff --git a/docs/extras/guide/proof_of_address_v1.md b/docs/extras/guide/proof_of_address_v1.md deleted file mode 100644 index 8099d5b2..00000000 --- a/docs/extras/guide/proof_of_address_v1.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -title: Proof of Address OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-proof-of-address-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Proof of Address API](https://platform.mindee.com/mindee/proof_of_address). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/proof_of_address/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Proof of Address sample](https://github.com/mindee/client-lib-test-data/blob/main/products/proof_of_address/default_sample.jpg?raw=true) - -# Quick-Start -```py -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.ProofOfAddressV1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 5d2361e9-405e-4fc1-8531-f92a3aef0c38 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/proof_of_address v1.1 -:Rotation applied: Yes - -Prediction -========== -:Locale: en; en; USD; -:Issuer Name: PPL ELECTRIC UTILITIES -:Issuer Company Registrations: -:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175 -:Recipient Name: -:Recipient Company Registrations: -:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062 -:Dates: 2011-07-27 - 2011-07-06 - 2011-08-03 - 2011-07-27 - 2011-06-01 - 2011-07-01 - 2010-07-01 - 2010-08-01 - 2011-07-01 - 2009-08-01 - 2010-07-01 - 2011-07-27 -:Date of Issue: 2011-07-27 - -Page Predictions -================ - -Page 0 ------- -:Locale: en; en; USD; -:Issuer Name: PPL ELECTRIC UTILITIES -:Issuer Company Registrations: -:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175 -:Recipient Name: -:Recipient Company Registrations: -:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062 -:Dates: 2011-07-27 - 2011-07-06 - 2011-08-03 - 2011-07-27 - 2011-06-01 - 2011-07-01 - 2010-07-01 - 2010-08-01 - 2011-07-01 - 2009-08-01 - 2010-07-01 - 2011-07-27 -:Date of Issue: 2011-07-27 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### CompanyRegistrationField -Aside from the basic `BaseField` attributes, the company registration field `CompanyRegistrationField` also implements the following: - -* **type** (`str`): the type of company. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### LocaleField -The locale field `LocaleField` only implements the **value**, **confidence** and **page_id** base `BaseField` attributes, but it comes with its own: - -* **language** (`str`): ISO 639-1 language code (e.g.: `en` for English). Can be `None`. -* **country** (`str`): ISO 3166-1 alpha-2 or ISO 3166-1 alpha-3 code for countries (e.g.: `GRB` or `GB` for "Great Britain"). Can be `None`. -* **currency** (`str`): ISO 4217 code for currencies (e.g.: `USD` for "US Dollars"). Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Proof of Address V1: - -## Date of Issue -**date** ([DateField](#datefield)): The date the document was issued. - -```py -print(result.document.inference.prediction.date.value) -``` - -## Dates -**dates** (List[[DateField](#datefield)]): List of dates found on the document. - -```py -for dates_elem in result.document.inference.prediction.dates: - print(dates_elem.value) -``` - -## Issuer Address -**issuer_address** ([StringField](#stringfield)): The address of the document's issuer. - -```py -print(result.document.inference.prediction.issuer_address.value) -``` - -## Issuer Company Registrations -**issuer_company_registration** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registrations found for the issuer. - -```py -for issuer_company_registration_elem in result.document.inference.prediction.issuer_company_registration: - print(issuer_company_registration_elem.value) -``` - -## Issuer Name -**issuer_name** ([StringField](#stringfield)): The name of the person or company issuing the document. - -```py -print(result.document.inference.prediction.issuer_name.value) -``` - -## Locale -**locale** ([LocaleField](#localefield)): The locale detected on the document. - -```py -print(result.document.inference.prediction.locale.value) -``` - -## Recipient Address -**recipient_address** ([StringField](#stringfield)): The address of the recipient. - -```py -print(result.document.inference.prediction.recipient_address.value) -``` - -## Recipient Company Registrations -**recipient_company_registration** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registrations found for the recipient. - -```py -for recipient_company_registration_elem in result.document.inference.prediction.recipient_company_registration: - print(recipient_company_registration_elem.value) -``` - -## Recipient Name -**recipient_name** ([StringField](#stringfield)): The name of the person or company receiving the document. - -```py -print(result.document.inference.prediction.recipient_name.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/us_w9_v1.md b/docs/extras/guide/us_w9_v1.md deleted file mode 100644 index bbd15bfb..00000000 --- a/docs/extras/guide/us_w9_v1.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: US W9 OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-us-w9-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [W9 API](https://platform.mindee.com/mindee/us_w9). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_w9/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![W9 sample](https://github.com/mindee/client-lib-test-data/blob/main/products/us_w9/default_sample.jpg?raw=true) - -# Quick-Start -```py -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -# The endpoint name must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.parse(product.us.W9V1, input_doc) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: d7c5b25f-e0d3-4491-af54-6183afa1aaab -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/us_w9 v1.0 -:Rotation applied: Yes - -Prediction -========== - -Page Predictions -================ - -Page 0 ------- -:Name: Stephen W Hawking -:SSN: 560758145 -:Address: Somewhere In Milky Way -:City State Zip: Probably Still At Cambridge P O Box CB1 -:Business Name: -:EIN: 942203664 -:Tax Classification: individual -:Tax Classification Other Details: -:W9 Revision Date: august 2013 -:Signature Position: Polygon with 4 points. -:Signature Date Position: -:Tax Classification LLC: -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### PositionField -The position field `PositionField` does not implement all the basic `BaseField` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to: - -* **rectangle** (`[Point, Point, Point, Point]`): a Polygon with four points that may be oriented (even beyond canvas). -* **quadrangle** (`[Point, Point, Point, Point]`): a free polygon made up of four points. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Page-Level Fields -Some fields are constrained to the page level, and so will not be retrievable at document level. - -# Attributes -The following fields are extracted for W9 V1: - -## Address -[📄](#page-level-fields "This field is only present on individual pages.")**address** ([StringField](#stringfield)): The street address (number, street, and apt. or suite no.) of the applicant. - -```py -for address_elem in result.document.address: - print(address_elem.value) -``` - -## Business Name -[📄](#page-level-fields "This field is only present on individual pages.")**business_name** ([StringField](#stringfield)): The business name or disregarded entity name, if different from Name. - -```py -for business_name_elem in result.document.business_name: - print(business_name_elem.value) -``` - -## City State Zip -[📄](#page-level-fields "This field is only present on individual pages.")**city_state_zip** ([StringField](#stringfield)): The city, state, and ZIP code of the applicant. - -```py -for city_state_zip_elem in result.document.city_state_zip: - print(city_state_zip_elem.value) -``` - -## EIN -[📄](#page-level-fields "This field is only present on individual pages.")**ein** ([StringField](#stringfield)): The employer identification number. - -```py -for ein_elem in result.document.ein: - print(ein_elem.value) -``` - -## Name -[📄](#page-level-fields "This field is only present on individual pages.")**name** ([StringField](#stringfield)): Name as shown on the applicant's income tax return. - -```py -for name_elem in result.document.name: - print(name_elem.value) -``` - -## Signature Date Position -[📄](#page-level-fields "This field is only present on individual pages.")**signature_date_position** ([PositionField](#positionfield)): Position of the signature date on the document. - -```py -for signature_date_position_elem in result.document.signature_date_position: - print(signature_date_position_elem.polygon) -``` - -## Signature Position -[📄](#page-level-fields "This field is only present on individual pages.")**signature_position** ([PositionField](#positionfield)): Position of the signature on the document. - -```py -for signature_position_elem in result.document.signature_position: - print(signature_position_elem.polygon) -``` - -## SSN -[📄](#page-level-fields "This field is only present on individual pages.")**ssn** ([StringField](#stringfield)): The applicant's social security number. - -```py -for ssn_elem in result.document.ssn: - print(ssn_elem.value) -``` - -## Tax Classification -[📄](#page-level-fields "This field is only present on individual pages.")**tax_classification** ([StringField](#stringfield)): The federal tax classification, which can vary depending on the revision date. - -```py -for tax_classification_elem in result.document.tax_classification: - print(tax_classification_elem.value) -``` - -## Tax Classification LLC -[📄](#page-level-fields "This field is only present on individual pages.")**tax_classification_llc** ([StringField](#stringfield)): Depending on revision year, among S, C, P or D for Limited Liability Company Classification. - -```py -for tax_classification_llc_elem in result.document.tax_classification_llc: - print(tax_classification_llc_elem.value) -``` - -## Tax Classification Other Details -[📄](#page-level-fields "This field is only present on individual pages.")**tax_classification_other_details** ([StringField](#stringfield)): Tax Classification Other Details. - -```py -for tax_classification_other_details_elem in result.document.tax_classification_other_details: - print(tax_classification_other_details_elem.value) -``` - -## W9 Revision Date -[📄](#page-level-fields "This field is only present on individual pages.")**w9_revision_date** ([StringField](#stringfield)): The Revision month and year of the W9 form. - -```py -for w9_revision_date_elem in result.document.w9_revision_date: - print(w9_revision_date_elem.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/product/proof_of_address_v1.rst b/docs/product/proof_of_address_v1.rst deleted file mode 100644 index 4260ff40..00000000 --- a/docs/product/proof_of_address_v1.rst +++ /dev/null @@ -1,15 +0,0 @@ -Proof of Address V1 -------------------- - -**Sample Code:** - -.. literalinclude:: /extras/code_samples/proof_of_address_v1.txt - :language: Python - -.. autoclass:: mindee.product.proof_of_address.proof_of_address_v1.ProofOfAddressV1 - :members: - :inherited-members: - -.. autoclass:: mindee.product.proof_of_address.proof_of_address_v1_document.ProofOfAddressV1Document - :members: - :inherited-members: diff --git a/docs/product/us/w9_v1.rst b/docs/product/us/w9_v1.rst deleted file mode 100644 index 0c7c8485..00000000 --- a/docs/product/us/w9_v1.rst +++ /dev/null @@ -1,19 +0,0 @@ -W9 V1 ------ - -**Sample Code:** - -.. literalinclude:: /extras/code_samples/us_w9_v1.txt - :language: Python - -.. autoclass:: mindee.product.us.w9.w9_v1.W9V1 - :members: - :inherited-members: - -.. autoclass:: mindee.product.us.w9.w9_v1_document.W9V1Document - :members: - :inherited-members: - -.. autoclass:: mindee.product.us.w9.w9_v1_page.W9V1Page - :members: - :inherited-members: diff --git a/mindee/commands/cli_products.py b/mindee/commands/cli_products.py index 900aedc6..68795b0a 100644 --- a/mindee/commands/cli_products.py +++ b/mindee/commands/cli_products.py @@ -154,12 +154,6 @@ class CommandConfig(Generic[TypeInference]): is_sync=True, is_async=False, ), - "proof-of-address": CommandConfig( - help="Proof of Address", - doc_class=product.ProofOfAddressV1, - is_sync=True, - is_async=False, - ), "receipt": CommandConfig( help="Receipt", doc_class=product.ReceiptV5, @@ -190,10 +184,4 @@ class CommandConfig(Generic[TypeInference]): is_sync=False, is_async=True, ), - "us-w9": CommandConfig( - help="W9", - doc_class=product.us.W9V1, - is_sync=True, - is_async=False, - ), } diff --git a/mindee/product/__init__.py b/mindee/product/__init__.py index 6d540745..f49443dd 100644 --- a/mindee/product/__init__.py +++ b/mindee/product/__init__.py @@ -129,10 +129,6 @@ from mindee.product.passport.passport_v1_document import ( PassportV1Document, ) -from mindee.product.proof_of_address.proof_of_address_v1 import ProofOfAddressV1 -from mindee.product.proof_of_address.proof_of_address_v1_document import ( - ProofOfAddressV1Document, -) from mindee.product.receipt.receipt_v5 import ReceiptV5 from mindee.product.receipt.receipt_v5_document import ( ReceiptV5Document, diff --git a/mindee/product/proof_of_address/__init__.py b/mindee/product/proof_of_address/__init__.py deleted file mode 100644 index f1fcdfdb..00000000 --- a/mindee/product/proof_of_address/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from mindee.product.proof_of_address.proof_of_address_v1 import ProofOfAddressV1 -from mindee.product.proof_of_address.proof_of_address_v1_document import ( - ProofOfAddressV1Document, -) diff --git a/mindee/product/proof_of_address/proof_of_address_v1.py b/mindee/product/proof_of_address/proof_of_address_v1.py deleted file mode 100644 index 4744d619..00000000 --- a/mindee/product/proof_of_address/proof_of_address_v1.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import List - -from mindee.parsing.common.inference import Inference -from mindee.parsing.common.page import Page -from mindee.parsing.common.string_dict import StringDict -from mindee.product.proof_of_address.proof_of_address_v1_document import ( - ProofOfAddressV1Document, -) - - -class ProofOfAddressV1(Inference): - """Proof of Address API version 1 inference prediction.""" - - prediction: ProofOfAddressV1Document - """Document-level prediction.""" - pages: List[Page[ProofOfAddressV1Document]] - """Page-level prediction(s).""" - endpoint_name = "proof_of_address" - """Name of the endpoint.""" - endpoint_version = "1" - """Version of the endpoint.""" - - def __init__(self, raw_prediction: StringDict): - """ - Proof of Address v1 inference. - - :param raw_prediction: Raw prediction from the HTTP response. - """ - super().__init__(raw_prediction) - - self.prediction = ProofOfAddressV1Document(raw_prediction["prediction"]) - self.pages = [] - for page in raw_prediction["pages"]: - try: - page_prediction = page["prediction"] - except KeyError: - continue - if page_prediction: - self.pages.append(Page(ProofOfAddressV1Document, page)) diff --git a/mindee/product/proof_of_address/proof_of_address_v1_document.py b/mindee/product/proof_of_address/proof_of_address_v1_document.py deleted file mode 100644 index 8ebbf710..00000000 --- a/mindee/product/proof_of_address/proof_of_address_v1_document.py +++ /dev/null @@ -1,104 +0,0 @@ -from typing import List, Optional - -from mindee.parsing.common.prediction import Prediction -from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.common.summary_helper import clean_out_string -from mindee.parsing.standard.company_registration import CompanyRegistrationField -from mindee.parsing.standard.date import DateField -from mindee.parsing.standard.locale import LocaleField -from mindee.parsing.standard.text import StringField - - -class ProofOfAddressV1Document(Prediction): - """Proof of Address API version 1.1 document data.""" - - date: DateField - """The date the document was issued.""" - dates: List[DateField] - """List of dates found on the document.""" - issuer_address: StringField - """The address of the document's issuer.""" - issuer_company_registration: List[CompanyRegistrationField] - """List of company registrations found for the issuer.""" - issuer_name: StringField - """The name of the person or company issuing the document.""" - locale: LocaleField - """The locale detected on the document.""" - recipient_address: StringField - """The address of the recipient.""" - recipient_company_registration: List[CompanyRegistrationField] - """List of company registrations found for the recipient.""" - recipient_name: StringField - """The name of the person or company receiving the document.""" - - def __init__( - self, - raw_prediction: StringDict, - page_id: Optional[int] = None, - ): - """ - Proof of Address document. - - :param raw_prediction: Raw prediction from HTTP response - :param page_id: Page number for multi pages pdf input - """ - super().__init__(raw_prediction, page_id) - self.date = DateField( - raw_prediction["date"], - page_id=page_id, - ) - self.dates = [ - DateField(prediction, page_id=page_id) - for prediction in raw_prediction["dates"] - ] - self.issuer_address = StringField( - raw_prediction["issuer_address"], - page_id=page_id, - ) - self.issuer_company_registration = [ - CompanyRegistrationField(prediction, page_id=page_id) - for prediction in raw_prediction["issuer_company_registration"] - ] - self.issuer_name = StringField( - raw_prediction["issuer_name"], - page_id=page_id, - ) - self.locale = LocaleField( - raw_prediction["locale"], - page_id=page_id, - ) - self.recipient_address = StringField( - raw_prediction["recipient_address"], - page_id=page_id, - ) - self.recipient_company_registration = [ - CompanyRegistrationField(prediction, page_id=page_id) - for prediction in raw_prediction["recipient_company_registration"] - ] - self.recipient_name = StringField( - raw_prediction["recipient_name"], - page_id=page_id, - ) - - def __str__(self) -> str: - dates = f"\n { ' ' * 7 }".join( - [str(item) for item in self.dates], - ) - issuer_company_registration = f"\n { ' ' * 30 }".join( - [str(item) for item in self.issuer_company_registration], - ) - recipient_company_registration = f"\n { ' ' * 33 }".join( - [str(item) for item in self.recipient_company_registration], - ) - out_str: str = f":Locale: {self.locale}\n" - out_str += f":Issuer Name: {self.issuer_name}\n" - out_str += f":Issuer Company Registrations: {issuer_company_registration}\n" - out_str += f":Issuer Address: {self.issuer_address}\n" - out_str += f":Recipient Name: {self.recipient_name}\n" - out_str += ( - f":Recipient Company Registrations: {recipient_company_registration}\n" - ) - out_str += f":Recipient Address: {self.recipient_address}\n" - out_str += f":Dates: {dates}\n" - out_str += f":Date of Issue: {self.date}\n" - return clean_out_string(out_str) diff --git a/mindee/product/us/__init__.py b/mindee/product/us/__init__.py index 09725bbc..d1c0da1a 100644 --- a/mindee/product/us/__init__.py +++ b/mindee/product/us/__init__.py @@ -32,10 +32,3 @@ from mindee.product.us.us_mail.us_mail_v3_sender_address import ( UsMailV3SenderAddress, ) -from mindee.product.us.w9.w9_v1 import W9V1 -from mindee.product.us.w9.w9_v1_document import ( - W9V1Document, -) -from mindee.product.us.w9.w9_v1_page import ( - W9V1Page, -) diff --git a/mindee/product/us/w9/__init__.py b/mindee/product/us/w9/__init__.py deleted file mode 100644 index 1d6d24fb..00000000 --- a/mindee/product/us/w9/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from mindee.product.us.w9.w9_v1 import W9V1 -from mindee.product.us.w9.w9_v1_document import ( - W9V1Document, -) -from mindee.product.us.w9.w9_v1_page import ( - W9V1Page, -) diff --git a/mindee/product/us/w9/w9_v1.py b/mindee/product/us/w9/w9_v1.py deleted file mode 100644 index ef51345c..00000000 --- a/mindee/product/us/w9/w9_v1.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import List - -from mindee.parsing.common.inference import Inference -from mindee.parsing.common.page import Page -from mindee.parsing.common.string_dict import StringDict -from mindee.product.us.w9.w9_v1_document import ( - W9V1Document, -) -from mindee.product.us.w9.w9_v1_page import ( - W9V1Page, -) - - -class W9V1(Inference): - """W9 API version 1 inference prediction.""" - - prediction: W9V1Document - """Document-level prediction.""" - pages: List[Page[W9V1Page]] - """Page-level prediction(s).""" - endpoint_name = "us_w9" - """Name of the endpoint.""" - endpoint_version = "1" - """Version of the endpoint.""" - - def __init__(self, raw_prediction: StringDict): - """ - W9 v1 inference. - - :param raw_prediction: Raw prediction from the HTTP response. - """ - super().__init__(raw_prediction) - - self.prediction = W9V1Document(raw_prediction["prediction"]) - self.pages = [] - for page in raw_prediction["pages"]: - try: - page_prediction = page["prediction"] - except KeyError: - continue - if page_prediction: - self.pages.append(Page(W9V1Page, page)) diff --git a/mindee/product/us/w9/w9_v1_document.py b/mindee/product/us/w9/w9_v1_document.py deleted file mode 100644 index 56c22e8b..00000000 --- a/mindee/product/us/w9/w9_v1_document.py +++ /dev/null @@ -1,8 +0,0 @@ -from mindee.parsing.common.prediction import Prediction - - -class W9V1Document(Prediction): - """W9 API version 1.0 document data.""" - - def __str__(self) -> str: - return "" diff --git a/mindee/product/us/w9/w9_v1_page.py b/mindee/product/us/w9/w9_v1_page.py deleted file mode 100644 index 1c5cf585..00000000 --- a/mindee/product/us/w9/w9_v1_page.py +++ /dev/null @@ -1,115 +0,0 @@ -from typing import Optional - -from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.common.summary_helper import clean_out_string -from mindee.parsing.standard.position import PositionField -from mindee.parsing.standard.text import StringField -from mindee.product.us.w9.w9_v1_document import ( - W9V1Document, -) - - -class W9V1Page(W9V1Document): - """W9 API version 1.0 page data.""" - - address: StringField - """The street address (number, street, and apt. or suite no.) of the applicant.""" - business_name: StringField - """The business name or disregarded entity name, if different from Name.""" - city_state_zip: StringField - """The city, state, and ZIP code of the applicant.""" - ein: StringField - """The employer identification number.""" - name: StringField - """Name as shown on the applicant's income tax return.""" - signature_date_position: PositionField - """Position of the signature date on the document.""" - signature_position: PositionField - """Position of the signature on the document.""" - ssn: StringField - """The applicant's social security number.""" - tax_classification: StringField - """The federal tax classification, which can vary depending on the revision date.""" - tax_classification_llc: StringField - """Depending on revision year, among S, C, P or D for Limited Liability Company Classification.""" - tax_classification_other_details: StringField - """Tax Classification Other Details.""" - w9_revision_date: StringField - """The Revision month and year of the W9 form.""" - - def __init__( - self, - raw_prediction: StringDict, - page_id: Optional[int] = None, - ): - """ - W9 page. - - :param raw_prediction: Raw prediction from HTTP response - :param page_id: Page number for multi pages pdf input - """ - super().__init__(raw_prediction=raw_prediction, page_id=page_id) - self.address = StringField( - raw_prediction["address"], - page_id=page_id, - ) - self.business_name = StringField( - raw_prediction["business_name"], - page_id=page_id, - ) - self.city_state_zip = StringField( - raw_prediction["city_state_zip"], - page_id=page_id, - ) - self.ein = StringField( - raw_prediction["ein"], - page_id=page_id, - ) - self.name = StringField( - raw_prediction["name"], - page_id=page_id, - ) - self.signature_date_position = PositionField( - raw_prediction["signature_date_position"], - page_id=page_id, - ) - self.signature_position = PositionField( - raw_prediction["signature_position"], - page_id=page_id, - ) - self.ssn = StringField( - raw_prediction["ssn"], - page_id=page_id, - ) - self.tax_classification = StringField( - raw_prediction["tax_classification"], - page_id=page_id, - ) - self.tax_classification_llc = StringField( - raw_prediction["tax_classification_llc"], - page_id=page_id, - ) - self.tax_classification_other_details = StringField( - raw_prediction["tax_classification_other_details"], - page_id=page_id, - ) - self.w9_revision_date = StringField( - raw_prediction["w9_revision_date"], - page_id=page_id, - ) - - def __str__(self) -> str: - out_str: str = f":Name: {self.name}\n" - out_str += f":SSN: {self.ssn}\n" - out_str += f":Address: {self.address}\n" - out_str += f":City State Zip: {self.city_state_zip}\n" - out_str += f":Business Name: {self.business_name}\n" - out_str += f":EIN: {self.ein}\n" - out_str += f":Tax Classification: {self.tax_classification}\n" - out_str += f":Tax Classification Other Details: {self.tax_classification_other_details}\n" - out_str += f":W9 Revision Date: {self.w9_revision_date}\n" - out_str += f":Signature Position: {self.signature_position}\n" - out_str += f":Signature Date Position: {self.signature_date_position}\n" - out_str += f":Tax Classification LLC: {self.tax_classification_llc}\n" - out_str += f"{super().__str__()}" - return clean_out_string(out_str) diff --git a/tests/product/proof_of_address/__init__.py b/tests/product/proof_of_address/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/product/proof_of_address/test_proof_of_address_v1.py b/tests/product/proof_of_address/test_proof_of_address_v1.py deleted file mode 100644 index 7360b9d6..00000000 --- a/tests/product/proof_of_address/test_proof_of_address_v1.py +++ /dev/null @@ -1,54 +0,0 @@ -import json - -import pytest - -from mindee.parsing.common.document import Document -from mindee.parsing.common.page import Page -from mindee.product.proof_of_address.proof_of_address_v1 import ProofOfAddressV1 -from mindee.product.proof_of_address.proof_of_address_v1_document import ( - ProofOfAddressV1Document, -) -from tests.product import PRODUCT_DATA_DIR - -RESPONSE_DIR = PRODUCT_DATA_DIR / "proof_of_address" / "response_v1" - -ProofOfAddressV1DocumentType = Document[ - ProofOfAddressV1Document, - Page[ProofOfAddressV1Document], -] - - -@pytest.fixture -def complete_doc() -> ProofOfAddressV1DocumentType: - file_path = RESPONSE_DIR / "complete.json" - with open(file_path, "r", encoding="utf-8") as open_file: - json_data = json.load(open_file) - return Document(ProofOfAddressV1, json_data["document"]) - - -@pytest.fixture -def empty_doc() -> ProofOfAddressV1DocumentType: - file_path = RESPONSE_DIR / "empty.json" - with open(file_path, "r", encoding="utf-8") as open_file: - json_data = json.load(open_file) - return Document(ProofOfAddressV1, json_data["document"]) - - -def test_complete_doc(complete_doc: ProofOfAddressV1DocumentType): - file_path = RESPONSE_DIR / "summary_full.rst" - with open(file_path, "r", encoding="utf-8") as open_file: - reference_str = open_file.read() - assert str(complete_doc) == reference_str - - -def test_empty_doc(empty_doc: ProofOfAddressV1DocumentType): - prediction = empty_doc.inference.prediction - assert prediction.locale.value is None - assert prediction.issuer_name.value is None - assert len(prediction.issuer_company_registration) == 0 - assert prediction.issuer_address.value is None - assert prediction.recipient_name.value is None - assert len(prediction.recipient_company_registration) == 0 - assert prediction.recipient_address.value is None - assert len(prediction.dates) == 0 - assert prediction.date.value is None diff --git a/tests/product/us/w9/__init__.py b/tests/product/us/w9/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/product/us/w9/test_w9_v1.py b/tests/product/us/w9/test_w9_v1.py deleted file mode 100644 index 64d98b13..00000000 --- a/tests/product/us/w9/test_w9_v1.py +++ /dev/null @@ -1,77 +0,0 @@ -import json - -import pytest - -from mindee.parsing.common.document import Document -from mindee.parsing.common.page import Page -from mindee.product.us.w9.w9_v1 import W9V1 -from mindee.product.us.w9.w9_v1_document import ( - W9V1Document, -) -from mindee.product.us.w9.w9_v1_page import ( - W9V1Page, -) -from tests.product import PRODUCT_DATA_DIR - -RESPONSE_DIR = PRODUCT_DATA_DIR / "us_w9" / "response_v1" - -W9V1DocumentType = Document[ - W9V1Document, - Page[W9V1Page], -] - - -@pytest.fixture -def complete_doc() -> W9V1DocumentType: - file_path = RESPONSE_DIR / "complete.json" - with open(file_path, "r", encoding="utf-8") as open_file: - json_data = json.load(open_file) - return Document(W9V1, json_data["document"]) - - -@pytest.fixture -def empty_doc() -> W9V1DocumentType: - file_path = RESPONSE_DIR / "empty.json" - with open(file_path, "r", encoding="utf-8") as open_file: - json_data = json.load(open_file) - return Document(W9V1, json_data["document"]) - - -@pytest.fixture -def complete_page0() -> Page[W9V1Page]: - file_path = RESPONSE_DIR / "complete.json" - with open(file_path, "r", encoding="utf-8") as open_file: - json_data = json.load(open_file) - page0 = json_data["document"]["inference"]["pages"][0] - return Page(W9V1Page, page0) - - -def test_complete_doc(complete_doc: W9V1DocumentType): - file_path = RESPONSE_DIR / "summary_full.rst" - with open(file_path, "r", encoding="utf-8") as open_file: - reference_str = open_file.read() - assert str(complete_doc) == reference_str - - -def test_empty_doc(empty_doc: W9V1DocumentType): - prediction = empty_doc.inference.pages[0].prediction - assert prediction.name.value is None - assert prediction.ssn.value is None - assert prediction.address.value is None - assert prediction.city_state_zip.value is None - assert prediction.business_name.value is None - assert prediction.ein.value is None - assert prediction.tax_classification.value is None - assert prediction.tax_classification_other_details.value is None - assert prediction.w9_revision_date.value is None - assert not prediction.signature_position.polygon - assert not prediction.signature_date_position.polygon - assert prediction.tax_classification_llc.value is None - - -def test_complete_page0(complete_page0: Page[W9V1Page]): - file_path = RESPONSE_DIR / "summary_page0.rst" - with open(file_path, "r", encoding="utf-8") as open_file: - reference_str = open_file.read() - assert complete_page0.id == 0 - assert str(complete_page0) == reference_str diff --git a/tests/product/us/w9/test_w9_v1_regression.py b/tests/product/us/w9/test_w9_v1_regression.py deleted file mode 100644 index bbed815c..00000000 --- a/tests/product/us/w9/test_w9_v1_regression.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from mindee.client import Client -from mindee.product.us.w9.w9_v1 import W9V1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version - - -@pytest.mark.regression -def test_default_sample(): - client = Client() - with open( - PRODUCT_DATA_DIR / "us_w9" / "response_v1" / "default_sample.rst", - encoding="utf-8", - ) as rst_file: - rst_ref = rst_file.read() - - sample = client.source_from_path( - PRODUCT_DATA_DIR / "us_w9" / "default_sample.jpg", - ) - response = client.parse(W9V1, sample) - doc_response = response.document - doc_response.id = get_id(rst_ref) - doc_response.inference.product.version = get_version(rst_ref) - assert str(doc_response) == rst_ref