From 634ab537533f1ca5b9bd7d3db78f1c9fc70e4ea2 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 May 2025 12:50:01 +0200 Subject: [PATCH 1/4] add address field --- mindee/parsing/standard/__init__.py | 1 + mindee/parsing/standard/address.py | 62 +++++++++++++++++++++++++++++ mindee/parsing/standard/text.py | 1 + 3 files changed, 64 insertions(+) create mode 100644 mindee/parsing/standard/address.py diff --git a/mindee/parsing/standard/__init__.py b/mindee/parsing/standard/__init__.py index 1c59987c..5d66dd65 100644 --- a/mindee/parsing/standard/__init__.py +++ b/mindee/parsing/standard/__init__.py @@ -1,3 +1,4 @@ +from mindee.parsing.standard.address import AddressField from mindee.parsing.standard.amount import AmountField from mindee.parsing.standard.base import ( BaseField, diff --git a/mindee/parsing/standard/address.py b/mindee/parsing/standard/address.py new file mode 100644 index 00000000..6ffe094e --- /dev/null +++ b/mindee/parsing/standard/address.py @@ -0,0 +1,62 @@ +from typing import Optional + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.standard.text import StringField + + +class AddressField(StringField): + """A field containing an address value.""" + + street_number: Optional[str] + """Street number.""" + street_name: Optional[str] + """Street name.""" + po_box: Optional[str] + """PO Box number.""" + address_complement: Optional[str] + """Address complement.""" + city: Optional[str] + """City name.""" + postal_code: Optional[str] + """Postal code.""" + state: Optional[str] + """State name.""" + country: Optional[str] + """Country name.""" + + def __init__( + self, + raw_prediction: StringDict, + reconstructed: bool = False, + page_id: Optional[int] = None, + ): + """ + Text field object. + + :param raw_prediction: Amount prediction object from HTTP response. + :param reconstructed: Bool for reconstructed object (not extracted in the API). + :param page_id: Page number for multi-page document. + """ + self.value = None + super().__init__( + raw_prediction, + value_key="value", + reconstructed=reconstructed, + page_id=page_id, + ) + if raw_prediction.get("street_number"): + self.street_number = raw_prediction["street_number"] + if raw_prediction.get("street_name"): + self.street_name = raw_prediction["street_name"] + if raw_prediction.get("po_box"): + self.po_box = raw_prediction["po_box"] + if raw_prediction.get("address_complement"): + self.address_complement = raw_prediction["address_complement"] + if raw_prediction.get("city"): + self.city = raw_prediction["city"] + if raw_prediction.get("postal_code"): + self.postal_code = raw_prediction["postal_code"] + if raw_prediction.get("state"): + self.city = raw_prediction["state"] + if raw_prediction.get("country"): + self.city = raw_prediction["country"] diff --git a/mindee/parsing/standard/text.py b/mindee/parsing/standard/text.py index 80cba330..f3e3c191 100644 --- a/mindee/parsing/standard/text.py +++ b/mindee/parsing/standard/text.py @@ -8,6 +8,7 @@ class StringField(FieldPositionMixin, BaseField): """A field containing a text value.""" value: Optional[str] + """Value of the string.""" raw_value: Optional[str] """The value as it appears on the document.""" From f14be028b6687855d653d5de2f6ecc745f7135d7 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 May 2025 15:04:12 +0200 Subject: [PATCH 2/4] :sparkles: add support for full address fields + upgrade products --- docs/extras/guide/delivery_notes_v1.md | 4 +- docs/extras/guide/expense_receipts_v5.md | 14 ++++- docs/extras/guide/financial_document_v1.md | 28 ++++++--- docs/extras/guide/ind_passport_v1.md | 4 +- docs/extras/guide/invoices_v4.md | 59 +++++++++++++++++-- docs/extras/guide/us_healthcare_cards_v1.md | 30 ++++++---- docs/extras/guide/us_mail_v3.md | 35 +---------- mindee/parsing/standard/address.py | 3 + .../delivery_note_v1_document.py | 9 +-- .../financial_document_v1_document.py | 23 ++++---- mindee/product/invoice/invoice_v4_document.py | 33 ++++++++--- mindee/product/receipt/receipt_v5_document.py | 7 ++- .../healthcare_card_v1_copay.py | 6 +- .../healthcare_card_v1_document.py | 13 +++- tests/api/test_response.py | 2 +- tests/data | 2 +- .../test_healthcare_card_v1.py | 1 + tests/test_client.py | 12 +++- 18 files changed, 183 insertions(+), 102 deletions(-) diff --git a/docs/extras/guide/delivery_notes_v1.md b/docs/extras/guide/delivery_notes_v1.md index 0bf18f20..c01ff694 100644 --- a/docs/extras/guide/delivery_notes_v1.md +++ b/docs/extras/guide/delivery_notes_v1.md @@ -95,7 +95,7 @@ The text field `StringField` only has one constraint: its **value** is an `Optio The following fields are extracted for Delivery note V1: ## Customer Address -**customer_address** ([StringField](#stringfield)): The address of the customer receiving the goods. +**customer_address** ([AddressField](#addressfield)): The address of the customer receiving the goods. ```py print(result.document.inference.prediction.customer_address.value) @@ -123,7 +123,7 @@ print(result.document.inference.prediction.delivery_number.value) ``` ## Supplier Address -**supplier_address** ([StringField](#stringfield)): The address of the supplier providing the goods. +**supplier_address** ([AddressField](#addressfield)): The address of the supplier providing the goods. ```py print(result.document.inference.prediction.supplier_address.value) diff --git a/docs/extras/guide/expense_receipts_v5.md b/docs/extras/guide/expense_receipts_v5.md index 352d06d2..25e6710a 100644 --- a/docs/extras/guide/expense_receipts_v5.md +++ b/docs/extras/guide/expense_receipts_v5.md @@ -237,6 +237,9 @@ The following fields are extracted for Receipt V5: - 'gasoline' - 'telecom' - 'miscellaneous' + - 'software' + - 'shopping' + - 'energy' ```py print(result.document.inference.prediction.category.value) @@ -291,6 +294,15 @@ print(result.document.inference.prediction.receipt_number.value) - 'train' - 'restaurant' - 'shopping' + - 'other' + - 'groceries' + - 'cultural' + - 'electronics' + - 'office_supplies' + - 'micromobility' + - 'car_rental' + - 'public' + - 'delivery' - None ```py @@ -298,7 +310,7 @@ print(result.document.inference.prediction.subcategory.value) ``` ## Supplier Address -**supplier_address** ([StringField](#stringfield)): The address of the supplier or merchant. +**supplier_address** ([AddressField](#addressfield)): The address of the supplier or merchant. ```py print(result.document.inference.prediction.supplier_address.value) diff --git a/docs/extras/guide/financial_document_v1.md b/docs/extras/guide/financial_document_v1.md index 1b17667d..735ee853 100644 --- a/docs/extras/guide/financial_document_v1.md +++ b/docs/extras/guide/financial_document_v1.md @@ -70,12 +70,12 @@ print(result.document) ######## Document ######## -:Mindee ID: f52333ab-811e-4647-993e-ad79e072afa3 +:Mindee ID: 6dd26385-719b-4527-bf6f-87d9da619de5 :Filename: default_sample.jpg Inference ######### -:Product: mindee/financial_document v1.12 +:Product: mindee/financial_document v1.14 :Rotation applied: Yes Prediction @@ -276,14 +276,14 @@ A `FinancialDocumentV1LineItem` implements the following attributes: The following fields are extracted for Financial Document V1: ## Billing Address -**billing_address** ([StringField](#stringfield)): The customer's address used for billing. +**billing_address** ([AddressField](#addressfield)): The customer's address used for billing. ```py print(result.document.inference.prediction.billing_address.value) ``` ## Purchase Category -**category** ([ClassificationField](#classificationfield)): The purchase category, only for receipts. +**category** ([ClassificationField](#classificationfield)): The purchase category. #### Possible values include: - 'toll' @@ -294,13 +294,16 @@ print(result.document.inference.prediction.billing_address.value) - 'gasoline' - 'telecom' - 'miscellaneous' + - 'software' + - 'shopping' + - 'energy' ```py print(result.document.inference.prediction.category.value) ``` ## Customer Address -**customer_address** ([StringField](#stringfield)): The address of the customer. +**customer_address** ([AddressField](#addressfield)): The address of the customer. ```py print(result.document.inference.prediction.customer_address.value) @@ -432,14 +435,14 @@ for reference_numbers_elem in result.document.inference.prediction.reference_num ``` ## Shipping Address -**shipping_address** ([StringField](#stringfield)): The customer's address used for shipping. +**shipping_address** ([AddressField](#addressfield)): The customer's address used for shipping. ```py print(result.document.inference.prediction.shipping_address.value) ``` ## Purchase Subcategory -**subcategory** ([ClassificationField](#classificationfield)): The purchase subcategory for transport and food, only for receipts. +**subcategory** ([ClassificationField](#classificationfield)): The purchase subcategory for transport, food and shooping. #### Possible values include: - 'plane' @@ -447,6 +450,15 @@ print(result.document.inference.prediction.shipping_address.value) - 'train' - 'restaurant' - 'shopping' + - 'other' + - 'groceries' + - 'cultural' + - 'electronics' + - 'office_supplies' + - 'micromobility' + - 'car_rental' + - 'public' + - 'delivery' - None ```py @@ -454,7 +466,7 @@ print(result.document.inference.prediction.subcategory.value) ``` ## Supplier Address -**supplier_address** ([StringField](#stringfield)): The address of the supplier or merchant. +**supplier_address** ([AddressField](#addressfield)): The address of the supplier or merchant. ```py print(result.document.inference.prediction.supplier_address.value) diff --git a/docs/extras/guide/ind_passport_v1.md b/docs/extras/guide/ind_passport_v1.md index 3e6c613d..7db4503d 100644 --- a/docs/extras/guide/ind_passport_v1.md +++ b/docs/extras/guide/ind_passport_v1.md @@ -45,7 +45,7 @@ Document Inference ######### -:Product: mindee/ind_passport v1.0 +:Product: mindee/ind_passport v1.2 :Rotation applied: Yes Prediction @@ -68,10 +68,10 @@ Prediction :Name of Mother: :Old Passport Date of Issue: :Old Passport Number: +:Old Passport Place of Issue: :Address Line 1: :Address Line 2: :Address Line 3: -:Old Passport Place of Issue: :File Number: ``` diff --git a/docs/extras/guide/invoices_v4.md b/docs/extras/guide/invoices_v4.md index 35011ecb..f53ebdbe 100644 --- a/docs/extras/guide/invoices_v4.md +++ b/docs/extras/guide/invoices_v4.md @@ -70,12 +70,12 @@ print(result.document) ######## Document ######## -:Mindee ID: 3e524d26-f7dc-4852-9bbf-833a127a9570 +:Mindee ID: 744748d5-9051-461c-b70c-bbf81f5ff943 :Filename: default_sample.jpg Inference ######### -:Product: mindee/invoices v4.10 +:Product: mindee/invoices v4.11 :Rotation applied: Yes Prediction @@ -111,6 +111,8 @@ Prediction :Billing Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada :Document Type: INVOICE :Document Type Extended: INVOICE +:Purchase Subcategory: +:Purchase Category: miscellaneous :Line Items: +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price | @@ -158,6 +160,8 @@ Page 0 :Billing Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada :Document Type: INVOICE :Document Type Extended: INVOICE +:Purchase Subcategory: +:Purchase Category: miscellaneous :Line Items: +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price | @@ -264,14 +268,33 @@ A `InvoiceV4LineItem` implements the following attributes: The following fields are extracted for Invoice V4: ## Billing Address -**billing_address** ([StringField](#stringfield)): The customer billing address. +**billing_address** ([AddressField](#addressfield)): The customer billing address. ```py print(result.document.inference.prediction.billing_address.value) ``` +## Purchase Category +**category** ([ClassificationField](#classificationfield)): The purchase category. + +#### Possible values include: + - 'toll' + - 'food' + - 'parking' + - 'transport' + - 'accommodation' + - 'telecom' + - 'miscellaneous' + - 'software' + - 'shopping' + - 'energy' + +```py +print(result.document.inference.prediction.category.value) +``` + ## Customer Address -**customer_address** ([StringField](#stringfield)): The address of the customer. +**customer_address** ([AddressField](#addressfield)): The address of the customer. ```py print(result.document.inference.prediction.customer_address.value) @@ -387,14 +410,38 @@ for reference_numbers_elem in result.document.inference.prediction.reference_num ``` ## Shipping Address -**shipping_address** ([StringField](#stringfield)): Customer's delivery address. +**shipping_address** ([AddressField](#addressfield)): Customer's delivery address. ```py print(result.document.inference.prediction.shipping_address.value) ``` +## Purchase Subcategory +**subcategory** ([ClassificationField](#classificationfield)): The purchase subcategory for transport, food and shopping. + +#### Possible values include: + - 'plane' + - 'taxi' + - 'train' + - 'restaurant' + - 'shopping' + - 'other' + - 'groceries' + - 'cultural' + - 'electronics' + - 'office_supplies' + - 'micromobility' + - 'car_rental' + - 'public' + - 'delivery' + - None + +```py +print(result.document.inference.prediction.subcategory.value) +``` + ## Supplier Address -**supplier_address** ([StringField](#stringfield)): The address of the supplier or merchant. +**supplier_address** ([AddressField](#addressfield)): The address of the supplier or merchant. ```py print(result.document.inference.prediction.supplier_address.value) diff --git a/docs/extras/guide/us_healthcare_cards_v1.md b/docs/extras/guide/us_healthcare_cards_v1.md index 7afee818..2cac10b2 100644 --- a/docs/extras/guide/us_healthcare_cards_v1.md +++ b/docs/extras/guide/us_healthcare_cards_v1.md @@ -40,17 +40,18 @@ print(result.document) ######## Document ######## -:Mindee ID: 1e71d5f0-dedb-4070-9d94-9207cd9570b5 +:Mindee ID: 5e917fc8-5c13-42b2-967f-954f4eed9959 :Filename: default_sample.jpg Inference ######### -:Product: mindee/us_healthcare_cards v1.2 +:Product: mindee/us_healthcare_cards v1.3 :Rotation applied: Yes Prediction ========== :Company Name: UnitedHealthcare +:Plan Name: Choice Plus :Member Name: SUBSCRIBER SMITH :Member ID: 123456789 :Issuer 80840: @@ -64,14 +65,14 @@ Prediction :RX ID: :RX GRP: UHEALTH :RX PCN: 9999 -:copays: +:Copays: +--------------+----------------------+ | Service Fees | Service Name | +==============+======================+ - | 300.00 | emergency_room | - +--------------+----------------------+ | 20.00 | office_visit | +--------------+----------------------+ + | 300.00 | emergency_room | + +--------------+----------------------+ | 75.00 | urgent_care | +--------------+----------------------+ | 30.00 | specialist | @@ -110,13 +111,13 @@ The text field `StringField` only has one constraint: its **value** is an `Optio ## Specific Fields Fields which are specific to this product; they are not used in any other product. -### copays Field -Is a fixed amount for a covered service. +### Copays Field +Copayments for covered services. A `HealthcareCardV1Copay` implements the following attributes: -* **service_fees** (`float`): The price of service. -* **service_name** (`str`): The name of service of the copay. +* **service_fees** (`float`): The price of the service. +* **service_name** (`str`): The name of the service. #### Possible values include: - primary_care @@ -137,8 +138,8 @@ The following fields are extracted for Healthcare Card V1: print(result.document.inference.prediction.company_name.value) ``` -## copays -**copays** (List[[HealthcareCardV1Copay](#copays-field)]): Is a fixed amount for a covered service. +## Copays +**copays** (List[[HealthcareCardV1Copay](#copays-field)]): Copayments for covered services. ```py for copays_elem in result.document.inference.prediction.copays: @@ -195,6 +196,13 @@ print(result.document.inference.prediction.member_name.value) print(result.document.inference.prediction.payer_id.value) ``` +## Plan Name +**plan_name** ([StringField](#stringfield)): The name of the healthcare plan. + +```py +print(result.document.inference.prediction.plan_name.value) +``` + ## RX BIN **rx_bin** ([StringField](#stringfield)): The BIN number for prescription drug coverage. diff --git a/docs/extras/guide/us_mail_v3.md b/docs/extras/guide/us_mail_v3.md index b77b4d6c..e0485a7b 100644 --- a/docs/extras/guide/us_mail_v3.md +++ b/docs/extras/guide/us_mail_v3.md @@ -6,7 +6,7 @@ parentDoc: 609808f773b0b90051d839de --- The Python OCR SDK supports the [US Mail API](https://platform.mindee.com/mindee/us_mail). -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_mail/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. +The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_mail/default_sample.jpg) can be used for testing purposes. ![US Mail sample](https://github.com/mindee/client-lib-test-data/blob/main/products/us_mail/default_sample.jpg?raw=true) # Quick-Start @@ -34,39 +34,6 @@ result: AsyncPredictResponse = mindee_client.enqueue_and_parse( print(result.document) ``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: f9c36f59-977d-4ddc-9f2d-31c294c456ac -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/us_mail v3.0 -:Rotation applied: Yes - -Prediction -========== -:Sender Name: company zed -:Sender Address: - :City: Dallas - :Complete Address: 54321 Elm Street, Dallas, Texas 54321 - :Postal Code: 54321 - :State: TX - :Street: 54321 Elm Street -:Recipient Names: Jane Doe -:Recipient Addresses: - +-----------------+-------------------------------------+-------------------+-------------+------------------------+-------+---------------------------+-----------------+ - | City | Complete Address | Is Address Change | Postal Code | Private Mailbox Number | State | Street | Unit | - +=================+=====================================+===================+=============+========================+=======+===========================+=================+ - | Detroit | 1234 Market Street PMB 4321, Det... | False | 12345 | 4321 | MI | 1234 Market Street | | - +-----------------+-------------------------------------+-------------------+-------------+------------------------+-------+---------------------------+-----------------+ -:Return to Sender: False -``` - # Field Types ## Standard Fields These fields are generic and used in several products. diff --git a/mindee/parsing/standard/address.py b/mindee/parsing/standard/address.py index 6ffe094e..cdfbcfdb 100644 --- a/mindee/parsing/standard/address.py +++ b/mindee/parsing/standard/address.py @@ -60,3 +60,6 @@ def __init__( self.city = raw_prediction["state"] if raw_prediction.get("country"): self.city = raw_prediction["country"] + + def __str__(self) -> str: + return self.value if self.value else "" diff --git a/mindee/product/delivery_note/delivery_note_v1_document.py b/mindee/product/delivery_note/delivery_note_v1_document.py index c58c61d5..13fb98cb 100644 --- a/mindee/product/delivery_note/delivery_note_v1_document.py +++ b/mindee/product/delivery_note/delivery_note_v1_document.py @@ -3,6 +3,7 @@ from mindee.parsing.common.prediction import Prediction from mindee.parsing.common.string_dict import StringDict from mindee.parsing.common.summary_helper import clean_out_string +from mindee.parsing.standard.address import AddressField from mindee.parsing.standard.amount import AmountField from mindee.parsing.standard.date import DateField from mindee.parsing.standard.text import StringField @@ -11,7 +12,7 @@ class DeliveryNoteV1Document(Prediction): """Delivery note API version 1.2 document data.""" - customer_address: StringField + customer_address: AddressField """The address of the customer receiving the goods.""" customer_name: StringField """The name of the customer receiving the goods.""" @@ -19,7 +20,7 @@ class DeliveryNoteV1Document(Prediction): """The date on which the delivery is scheduled to arrive.""" delivery_number: StringField """A unique identifier for the delivery note.""" - supplier_address: StringField + supplier_address: AddressField """The address of the supplier providing the goods.""" supplier_name: StringField """The name of the supplier providing the goods.""" @@ -38,7 +39,7 @@ def __init__( :param page_id: Page number for multi pages pdf input """ super().__init__(raw_prediction, page_id) - self.customer_address = StringField( + self.customer_address = AddressField( raw_prediction["customer_address"], page_id=page_id, ) @@ -54,7 +55,7 @@ def __init__( raw_prediction["delivery_number"], page_id=page_id, ) - self.supplier_address = StringField( + self.supplier_address = AddressField( raw_prediction["supplier_address"], page_id=page_id, ) diff --git a/mindee/product/financial_document/financial_document_v1_document.py b/mindee/product/financial_document/financial_document_v1_document.py index 7a9f9a35..27a68824 100644 --- a/mindee/product/financial_document/financial_document_v1_document.py +++ b/mindee/product/financial_document/financial_document_v1_document.py @@ -3,6 +3,7 @@ from mindee.parsing.common.prediction import Prediction from mindee.parsing.common.string_dict import StringDict from mindee.parsing.common.summary_helper import clean_out_string +from mindee.parsing.standard.address import AddressField from mindee.parsing.standard.amount import AmountField from mindee.parsing.standard.classification import ClassificationField from mindee.parsing.standard.company_registration import CompanyRegistrationField @@ -17,13 +18,13 @@ class FinancialDocumentV1Document(Prediction): - """Financial Document API version 1.12 document data.""" + """Financial Document API version 1.14 document data.""" - billing_address: StringField + billing_address: AddressField """The customer's address used for billing.""" category: ClassificationField - """The purchase category, only for receipts.""" - customer_address: StringField + """The purchase category.""" + customer_address: AddressField """The address of the customer.""" customer_company_registrations: List[CompanyRegistrationField] """List of company registration numbers associated to the customer.""" @@ -58,11 +59,11 @@ class FinancialDocumentV1Document(Prediction): """The receipt number or identifier only if document is a receipt.""" reference_numbers: List[StringField] """List of Reference numbers, including PO number, only if the document is an invoice.""" - shipping_address: StringField + shipping_address: AddressField """The customer's address used for shipping.""" subcategory: ClassificationField - """The purchase subcategory for transport and food, only for receipts.""" - supplier_address: StringField + """The purchase subcategory for transport, food and shooping.""" + supplier_address: AddressField """The address of the supplier or merchant.""" supplier_company_registrations: List[CompanyRegistrationField] """List of company registration numbers associated to the supplier.""" @@ -101,7 +102,7 @@ def __init__( :param page_id: Page number for multi pages pdf input """ super().__init__(raw_prediction, page_id) - self.billing_address = StringField( + self.billing_address = AddressField( raw_prediction["billing_address"], page_id=page_id, ) @@ -109,7 +110,7 @@ def __init__( raw_prediction["category"], page_id=page_id, ) - self.customer_address = StringField( + self.customer_address = AddressField( raw_prediction["customer_address"], page_id=page_id, ) @@ -173,7 +174,7 @@ def __init__( StringField(prediction, page_id=page_id) for prediction in raw_prediction["reference_numbers"] ] - self.shipping_address = StringField( + self.shipping_address = AddressField( raw_prediction["shipping_address"], page_id=page_id, ) @@ -181,7 +182,7 @@ def __init__( raw_prediction["subcategory"], page_id=page_id, ) - self.supplier_address = StringField( + self.supplier_address = AddressField( raw_prediction["supplier_address"], page_id=page_id, ) diff --git a/mindee/product/invoice/invoice_v4_document.py b/mindee/product/invoice/invoice_v4_document.py index fc5773bc..321d80d1 100644 --- a/mindee/product/invoice/invoice_v4_document.py +++ b/mindee/product/invoice/invoice_v4_document.py @@ -3,6 +3,7 @@ from mindee.parsing.common.prediction import Prediction from mindee.parsing.common.string_dict import StringDict from mindee.parsing.common.summary_helper import clean_out_string +from mindee.parsing.standard.address import AddressField from mindee.parsing.standard.amount import AmountField from mindee.parsing.standard.classification import ClassificationField from mindee.parsing.standard.company_registration import CompanyRegistrationField @@ -15,11 +16,13 @@ class InvoiceV4Document(Prediction): - """Invoice API version 4.10 document data.""" + """Invoice API version 4.11 document data.""" - billing_address: StringField + billing_address: AddressField """The customer billing address.""" - customer_address: StringField + category: ClassificationField + """The purchase category.""" + customer_address: AddressField """The address of the customer.""" customer_company_registrations: List[CompanyRegistrationField] """List of company registration numbers associated to the customer.""" @@ -47,9 +50,11 @@ class InvoiceV4Document(Prediction): """The purchase order number.""" reference_numbers: List[StringField] """List of all reference numbers on the invoice, including the purchase order number.""" - shipping_address: StringField + shipping_address: AddressField """Customer's delivery address.""" - supplier_address: StringField + subcategory: ClassificationField + """The purchase subcategory for transport, food and shopping.""" + supplier_address: AddressField """The address of the supplier or merchant.""" supplier_company_registrations: List[CompanyRegistrationField] """List of company registration numbers associated to the supplier.""" @@ -84,11 +89,15 @@ def __init__( :param page_id: Page number for multi pages pdf input """ super().__init__(raw_prediction, page_id) - self.billing_address = StringField( + self.billing_address = AddressField( raw_prediction["billing_address"], page_id=page_id, ) - self.customer_address = StringField( + self.category = ClassificationField( + raw_prediction["category"], + page_id=page_id, + ) + self.customer_address = AddressField( raw_prediction["customer_address"], page_id=page_id, ) @@ -144,11 +153,15 @@ def __init__( StringField(prediction, page_id=page_id) for prediction in raw_prediction["reference_numbers"] ] - self.shipping_address = StringField( + self.shipping_address = AddressField( raw_prediction["shipping_address"], page_id=page_id, ) - self.supplier_address = StringField( + self.subcategory = ClassificationField( + raw_prediction["subcategory"], + page_id=page_id, + ) + self.supplier_address = AddressField( raw_prediction["supplier_address"], page_id=page_id, ) @@ -268,5 +281,7 @@ def __str__(self) -> str: out_str += f":Billing Address: {self.billing_address}\n" out_str += f":Document Type: {self.document_type}\n" out_str += f":Document Type Extended: {self.document_type_extended}\n" + out_str += f":Purchase Subcategory: {self.subcategory}\n" + out_str += f":Purchase Category: {self.category}\n" out_str += f":Line Items: {self._line_items_to_str()}\n" return clean_out_string(out_str) diff --git a/mindee/product/receipt/receipt_v5_document.py b/mindee/product/receipt/receipt_v5_document.py index 89fb08ef..67f7dc45 100644 --- a/mindee/product/receipt/receipt_v5_document.py +++ b/mindee/product/receipt/receipt_v5_document.py @@ -3,6 +3,7 @@ from mindee.parsing.common.prediction import Prediction from mindee.parsing.common.string_dict import StringDict from mindee.parsing.common.summary_helper import clean_out_string +from mindee.parsing.standard.address import AddressField from mindee.parsing.standard.amount import AmountField from mindee.parsing.standard.classification import ClassificationField from mindee.parsing.standard.company_registration import CompanyRegistrationField @@ -14,7 +15,7 @@ class ReceiptV5Document(Prediction): - """Receipt API version 5.3 document data.""" + """Receipt API version 5.4 document data.""" category: ClassificationField """The purchase category of the receipt.""" @@ -30,7 +31,7 @@ class ReceiptV5Document(Prediction): """The receipt number or identifier.""" subcategory: ClassificationField """The purchase subcategory of the receipt for transport and food.""" - supplier_address: StringField + supplier_address: AddressField """The address of the supplier or merchant.""" supplier_company_registrations: List[CompanyRegistrationField] """List of company registration numbers associated to the supplier.""" @@ -91,7 +92,7 @@ def __init__( raw_prediction["subcategory"], page_id=page_id, ) - self.supplier_address = StringField( + self.supplier_address = AddressField( raw_prediction["supplier_address"], page_id=page_id, ) diff --git a/mindee/product/us/healthcare_card/healthcare_card_v1_copay.py b/mindee/product/us/healthcare_card/healthcare_card_v1_copay.py index 1fdfdaaf..4b3ee788 100644 --- a/mindee/product/us/healthcare_card/healthcare_card_v1_copay.py +++ b/mindee/product/us/healthcare_card/healthcare_card_v1_copay.py @@ -11,12 +11,12 @@ class HealthcareCardV1Copay(FieldPositionMixin, FieldConfidenceMixin): - """Is a fixed amount for a covered service.""" + """Copayments for covered services.""" service_fees: Optional[float] - """The price of service.""" + """The price of the service.""" service_name: Optional[str] - """The name of service of the copay.""" + """The name of the service.""" page_n: int """The document page on which the information was found.""" diff --git a/mindee/product/us/healthcare_card/healthcare_card_v1_document.py b/mindee/product/us/healthcare_card/healthcare_card_v1_document.py index 94487b50..9b824d98 100644 --- a/mindee/product/us/healthcare_card/healthcare_card_v1_document.py +++ b/mindee/product/us/healthcare_card/healthcare_card_v1_document.py @@ -11,12 +11,12 @@ class HealthcareCardV1Document(Prediction): - """Healthcare Card API version 1.2 document data.""" + """Healthcare Card API version 1.3 document data.""" company_name: StringField """The name of the company that provides the healthcare plan.""" copays: List[HealthcareCardV1Copay] - """Is a fixed amount for a covered service.""" + """Copayments for covered services.""" dependents: List[StringField] """The list of dependents covered by the healthcare plan.""" enrollment_date: DateField @@ -31,6 +31,8 @@ class HealthcareCardV1Document(Prediction): """The name of the member covered by the healthcare plan.""" payer_id: StringField """The unique identifier for the payer in the healthcare system.""" + plan_name: StringField + """The name of the healthcare plan.""" rx_bin: StringField """The BIN number for prescription drug coverage.""" rx_grp: StringField @@ -88,6 +90,10 @@ def __init__( raw_prediction["payer_id"], page_id=page_id, ) + self.plan_name = StringField( + raw_prediction["plan_name"], + page_id=page_id, + ) self.rx_bin = StringField( raw_prediction["rx_bin"], page_id=page_id, @@ -133,6 +139,7 @@ def __str__(self) -> str: [str(item) for item in self.dependents], ) out_str: str = f":Company Name: {self.company_name}\n" + out_str += f":Plan Name: {self.plan_name}\n" out_str += f":Member Name: {self.member_name}\n" out_str += f":Member ID: {self.member_id}\n" out_str += f":Issuer 80840: {self.issuer_80840}\n" @@ -143,6 +150,6 @@ def __str__(self) -> str: out_str += f":RX ID: {self.rx_id}\n" out_str += f":RX GRP: {self.rx_grp}\n" out_str += f":RX PCN: {self.rx_pcn}\n" - out_str += f":copays: {self._copays_to_str()}\n" + out_str += f":Copays: {self._copays_to_str()}\n" out_str += f":Enrollment Date: {self.enrollment_date}\n" return clean_out_string(out_str) diff --git a/tests/api/test_response.py b/tests/api/test_response.py index 7db98bc2..4e9b0a3e 100644 --- a/tests/api/test_response.py +++ b/tests/api/test_response.py @@ -24,7 +24,7 @@ def test_invoice_receipt_v5(): assert isinstance(parsed_response.document.inference, InvoiceV4) for page in parsed_response.document.inference.pages: assert isinstance(page.prediction, InvoiceV4Document) - assert parsed_response.document.n_pages == 2 + assert parsed_response.document.n_pages == 1 def test_response_receipt_v5(): diff --git a/tests/data b/tests/data index 2b3a6893..e48b26e5 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 2b3a68935e89d1033ae6d7e499361ae6f55cf2ab +Subproject commit e48b26e5250cbbab9d3ee6c66c0eb85b667a4bb8 diff --git a/tests/product/us/healthcare_card/test_healthcare_card_v1.py b/tests/product/us/healthcare_card/test_healthcare_card_v1.py index 11eacf30..9b179007 100644 --- a/tests/product/us/healthcare_card/test_healthcare_card_v1.py +++ b/tests/product/us/healthcare_card/test_healthcare_card_v1.py @@ -44,6 +44,7 @@ def test_complete_doc(complete_doc: HealthcareCardV1DocumentType): def test_empty_doc(empty_doc: HealthcareCardV1DocumentType): prediction = empty_doc.inference.prediction assert prediction.company_name.value is None + assert prediction.plan_name.value is None assert prediction.member_name.value is None assert prediction.member_id.value is None assert prediction.issuer_80840.value is None diff --git a/tests/test_client.py b/tests/test_client.py index 4d2771e0..1f02efd5 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -7,6 +7,7 @@ from mindee.error.mindee_http_error import MindeeHTTPError from mindee.input.local_response import LocalResponse from mindee.input.sources.local_input_source import LocalInputSource +from mindee.product import MultiReceiptsDetectorV1 from mindee.product.international_id.international_id_v2 import InternationalIdV2 from mindee.product.invoice.invoice_v4 import InvoiceV4 from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 @@ -121,11 +122,16 @@ def test_async_wrong_polling_delay(dummy_client: Client): def test_local_response_from_sync_json(dummy_client: Client): input_file = LocalResponse( - PRODUCT_DATA_DIR / "invoices" / "response_v4" / "complete.json" + PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" / "complete.json" ) - with open(PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full.rst") as f: + with open( + PRODUCT_DATA_DIR + / "multi_receipts_detector" + / "response_v1" + / "summary_full.rst" + ) as f: reference_doc = f.read() - result = dummy_client.load_prediction(InvoiceV4, input_file) + result = dummy_client.load_prediction(MultiReceiptsDetectorV1, input_file) assert isinstance(result, PredictResponse) assert str(result.document) == reference_doc From ef680076689d8fdb5073f432489a2fd66b02ec85 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 May 2025 15:11:24 +0200 Subject: [PATCH 3/4] update doc --- docs/extras/guide/delivery_notes_v1.md | 15 +++++++++++++++ docs/extras/guide/expense_receipts_v5.md | 15 +++++++++++++++ docs/extras/guide/financial_document_v1.md | 15 +++++++++++++++ docs/extras/guide/invoices_v4.md | 15 +++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/docs/extras/guide/delivery_notes_v1.md b/docs/extras/guide/delivery_notes_v1.md index c01ff694..d0c7ad21 100644 --- a/docs/extras/guide/delivery_notes_v1.md +++ b/docs/extras/guide/delivery_notes_v1.md @@ -80,6 +80,21 @@ A typical `BaseField` object will have the following attributes: Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. +### AddressField +Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: + +* **street_number** (`str`): String representation of the string number. Can be `None`. +* **street_name** (`str`): Name of the street. Can be `None`. +* **po_box** (`str`): String representation of the PO Box number. Can be `None`. +* **address_complement** (`str`): Address complement. Can be `None`. +* **city** (`str`): City name. Can be `None`. +* **postal_code** (`str`): String representation of the postal code. Can be `None`. +* **state** (`str`): State name. Can be `None`. +* **country** (`str`): Country name. Can be `None`. + +Note: The `value` field of an AddressField should be a concatenation of the rest of the values. + + ### AmountField The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. diff --git a/docs/extras/guide/expense_receipts_v5.md b/docs/extras/guide/expense_receipts_v5.md index 25e6710a..bd6366bc 100644 --- a/docs/extras/guide/expense_receipts_v5.md +++ b/docs/extras/guide/expense_receipts_v5.md @@ -165,6 +165,21 @@ A typical `BaseField` object will have the following attributes: Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. +### AddressField +Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: + +* **street_number** (`str`): String representation of the string number. Can be `None`. +* **street_name** (`str`): Name of the street. Can be `None`. +* **po_box** (`str`): String representation of the PO Box number. Can be `None`. +* **address_complement** (`str`): Address complement. Can be `None`. +* **city** (`str`): City name. Can be `None`. +* **postal_code** (`str`): String representation of the postal code. Can be `None`. +* **state** (`str`): State name. Can be `None`. +* **country** (`str`): Country name. Can be `None`. + +Note: The `value` field of an AddressField should be a concatenation of the rest of the values. + + ### AmountField The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. diff --git a/docs/extras/guide/financial_document_v1.md b/docs/extras/guide/financial_document_v1.md index 735ee853..2b5d4fe5 100644 --- a/docs/extras/guide/financial_document_v1.md +++ b/docs/extras/guide/financial_document_v1.md @@ -203,6 +203,21 @@ A typical `BaseField` object will have the following attributes: Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. +### AddressField +Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: + +* **street_number** (`str`): String representation of the string number. Can be `None`. +* **street_name** (`str`): Name of the street. Can be `None`. +* **po_box** (`str`): String representation of the PO Box number. Can be `None`. +* **address_complement** (`str`): Address complement. Can be `None`. +* **city** (`str`): City name. Can be `None`. +* **postal_code** (`str`): String representation of the postal code. Can be `None`. +* **state** (`str`): State name. Can be `None`. +* **country** (`str`): Country name. Can be `None`. + +Note: The `value` field of an AddressField should be a concatenation of the rest of the values. + + ### AmountField The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. diff --git a/docs/extras/guide/invoices_v4.md b/docs/extras/guide/invoices_v4.md index f53ebdbe..1f8de52c 100644 --- a/docs/extras/guide/invoices_v4.md +++ b/docs/extras/guide/invoices_v4.md @@ -195,6 +195,21 @@ A typical `BaseField` object will have the following attributes: Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. +### AddressField +Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: + +* **street_number** (`str`): String representation of the string number. Can be `None`. +* **street_name** (`str`): Name of the street. Can be `None`. +* **po_box** (`str`): String representation of the PO Box number. Can be `None`. +* **address_complement** (`str`): Address complement. Can be `None`. +* **city** (`str`): City name. Can be `None`. +* **postal_code** (`str`): String representation of the postal code. Can be `None`. +* **state** (`str`): State name. Can be `None`. +* **country** (`str`): Country name. Can be `None`. + +Note: The `value` field of an AddressField should be a concatenation of the rest of the values. + + ### AmountField The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. From 682120dc691211d003a80d798418e57b5b1ded24 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 May 2025 17:29:39 +0200 Subject: [PATCH 4/4] fix addresses in wrong products --- docs/extras/guide/delivery_notes_v1.md | 19 ++----------------- docs/extras/guide/expense_receipts_v5.md | 17 +---------------- .../delivery_note_v1_document.py | 9 ++++----- mindee/product/receipt/receipt_v5_document.py | 5 ++--- 4 files changed, 9 insertions(+), 41 deletions(-) diff --git a/docs/extras/guide/delivery_notes_v1.md b/docs/extras/guide/delivery_notes_v1.md index d0c7ad21..0bf18f20 100644 --- a/docs/extras/guide/delivery_notes_v1.md +++ b/docs/extras/guide/delivery_notes_v1.md @@ -80,21 +80,6 @@ A typical `BaseField` object will have the following attributes: Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. -### AddressField -Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: - -* **street_number** (`str`): String representation of the string number. Can be `None`. -* **street_name** (`str`): Name of the street. Can be `None`. -* **po_box** (`str`): String representation of the PO Box number. Can be `None`. -* **address_complement** (`str`): Address complement. Can be `None`. -* **city** (`str`): City name. Can be `None`. -* **postal_code** (`str`): String representation of the postal code. Can be `None`. -* **state** (`str`): State name. Can be `None`. -* **country** (`str`): Country name. Can be `None`. - -Note: The `value` field of an AddressField should be a concatenation of the rest of the values. - - ### AmountField The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. @@ -110,7 +95,7 @@ The text field `StringField` only has one constraint: its **value** is an `Optio The following fields are extracted for Delivery note V1: ## Customer Address -**customer_address** ([AddressField](#addressfield)): The address of the customer receiving the goods. +**customer_address** ([StringField](#stringfield)): The address of the customer receiving the goods. ```py print(result.document.inference.prediction.customer_address.value) @@ -138,7 +123,7 @@ print(result.document.inference.prediction.delivery_number.value) ``` ## Supplier Address -**supplier_address** ([AddressField](#addressfield)): The address of the supplier providing the goods. +**supplier_address** ([StringField](#stringfield)): The address of the supplier providing the goods. ```py print(result.document.inference.prediction.supplier_address.value) diff --git a/docs/extras/guide/expense_receipts_v5.md b/docs/extras/guide/expense_receipts_v5.md index bd6366bc..61cb79be 100644 --- a/docs/extras/guide/expense_receipts_v5.md +++ b/docs/extras/guide/expense_receipts_v5.md @@ -165,21 +165,6 @@ A typical `BaseField` object will have the following attributes: Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. -### AddressField -Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: - -* **street_number** (`str`): String representation of the string number. Can be `None`. -* **street_name** (`str`): Name of the street. Can be `None`. -* **po_box** (`str`): String representation of the PO Box number. Can be `None`. -* **address_complement** (`str`): Address complement. Can be `None`. -* **city** (`str`): City name. Can be `None`. -* **postal_code** (`str`): String representation of the postal code. Can be `None`. -* **state** (`str`): State name. Can be `None`. -* **country** (`str`): Country name. Can be `None`. - -Note: The `value` field of an AddressField should be a concatenation of the rest of the values. - - ### AmountField The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. @@ -325,7 +310,7 @@ print(result.document.inference.prediction.subcategory.value) ``` ## Supplier Address -**supplier_address** ([AddressField](#addressfield)): The address of the supplier or merchant. +**supplier_address** ([StringField](#stringfield)): The address of the supplier or merchant. ```py print(result.document.inference.prediction.supplier_address.value) diff --git a/mindee/product/delivery_note/delivery_note_v1_document.py b/mindee/product/delivery_note/delivery_note_v1_document.py index 13fb98cb..c58c61d5 100644 --- a/mindee/product/delivery_note/delivery_note_v1_document.py +++ b/mindee/product/delivery_note/delivery_note_v1_document.py @@ -3,7 +3,6 @@ from mindee.parsing.common.prediction import Prediction from mindee.parsing.common.string_dict import StringDict from mindee.parsing.common.summary_helper import clean_out_string -from mindee.parsing.standard.address import AddressField from mindee.parsing.standard.amount import AmountField from mindee.parsing.standard.date import DateField from mindee.parsing.standard.text import StringField @@ -12,7 +11,7 @@ class DeliveryNoteV1Document(Prediction): """Delivery note API version 1.2 document data.""" - customer_address: AddressField + customer_address: StringField """The address of the customer receiving the goods.""" customer_name: StringField """The name of the customer receiving the goods.""" @@ -20,7 +19,7 @@ class DeliveryNoteV1Document(Prediction): """The date on which the delivery is scheduled to arrive.""" delivery_number: StringField """A unique identifier for the delivery note.""" - supplier_address: AddressField + supplier_address: StringField """The address of the supplier providing the goods.""" supplier_name: StringField """The name of the supplier providing the goods.""" @@ -39,7 +38,7 @@ def __init__( :param page_id: Page number for multi pages pdf input """ super().__init__(raw_prediction, page_id) - self.customer_address = AddressField( + self.customer_address = StringField( raw_prediction["customer_address"], page_id=page_id, ) @@ -55,7 +54,7 @@ def __init__( raw_prediction["delivery_number"], page_id=page_id, ) - self.supplier_address = AddressField( + self.supplier_address = StringField( raw_prediction["supplier_address"], page_id=page_id, ) diff --git a/mindee/product/receipt/receipt_v5_document.py b/mindee/product/receipt/receipt_v5_document.py index 67f7dc45..06a9b11a 100644 --- a/mindee/product/receipt/receipt_v5_document.py +++ b/mindee/product/receipt/receipt_v5_document.py @@ -3,7 +3,6 @@ from mindee.parsing.common.prediction import Prediction from mindee.parsing.common.string_dict import StringDict from mindee.parsing.common.summary_helper import clean_out_string -from mindee.parsing.standard.address import AddressField from mindee.parsing.standard.amount import AmountField from mindee.parsing.standard.classification import ClassificationField from mindee.parsing.standard.company_registration import CompanyRegistrationField @@ -31,7 +30,7 @@ class ReceiptV5Document(Prediction): """The receipt number or identifier.""" subcategory: ClassificationField """The purchase subcategory of the receipt for transport and food.""" - supplier_address: AddressField + supplier_address: StringField """The address of the supplier or merchant.""" supplier_company_registrations: List[CompanyRegistrationField] """List of company registration numbers associated to the supplier.""" @@ -92,7 +91,7 @@ def __init__( raw_prediction["subcategory"], page_id=page_id, ) - self.supplier_address = AddressField( + self.supplier_address = StringField( raw_prediction["supplier_address"], page_id=page_id, )