From 62f332e748eb4354707c09f2018d3801f336f020 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 16 Sep 2024 18:05:39 -0700 Subject: [PATCH 01/15] Vb/fix ontology leaks plt 1379 (#1814) --- libs/labelbox/tests/conftest.py | 11 ++++++++++- libs/labelbox/tests/data/export/conftest.py | 3 ++- libs/labelbox/tests/integration/conftest.py | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index a07d52c4d..41a81e94f 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -2,8 +2,17 @@ import os import re import time +<<<<<<< HEAD import uuid from datetime import datetime +======= +from labelbox.schema.project import Project +import requests +from labelbox.schema.ontology import Ontology +import pytest +from types import SimpleNamespace +from typing import Type +>>>>>>> 0bbd7c29 (Vb/fix ontology leaks plt 1379 (#1814)) from enum import Enum from random import randint from string import ascii_letters @@ -1055,7 +1064,7 @@ def project_with_one_feature_ontology(project, client: Client): @pytest.fixture def configured_project_with_complex_ontology( - client: Client, initial_dataset, rand_gen, image_url, teardown_helpers + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), diff --git a/libs/labelbox/tests/data/export/conftest.py b/libs/labelbox/tests/data/export/conftest.py index b1b81230e..4a59b6966 100644 --- a/libs/labelbox/tests/data/export/conftest.py +++ b/libs/labelbox/tests/data/export/conftest.py @@ -1,8 +1,9 @@ import time -from labelbox import MediaType, Client import uuid + import pytest +from labelbox import Client, MediaType from labelbox.schema.annotation_import import AnnotationImportState, LabelImport from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.media_type import MediaType diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 88670811e..45056dfb4 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -113,7 +113,7 @@ def configured_project( @pytest.fixture def configured_project_with_complex_ontology( - client: Client, initial_dataset, rand_gen, image_url, teardown_helpers + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), From 1a82278b419057cb346f85d165e47666b39c788a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20J=C3=B3=C5=BAwiak?= Date: Mon, 9 Sep 2024 15:24:35 +0200 Subject: [PATCH 02/15] [PTDT-2553] Added integration tests for MMC MAL/GT imports --- .../test_generic_data_types.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index 921e98c9d..1cc5538d9 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -29,6 +29,78 @@ def validate_iso_format(date_string: str): assert parsed_t.second is not None +@pytest.mark.parametrize( + "media_type, data_type_class", + [ + (MediaType.Audio, GenericDataRowData), + (MediaType.Html, GenericDataRowData), + (MediaType.Image, GenericDataRowData), + (MediaType.Text, GenericDataRowData), + (MediaType.Video, GenericDataRowData), + (MediaType.Conversational, GenericDataRowData), + (MediaType.Document, GenericDataRowData), + (MediaType.LLMPromptResponseCreation, GenericDataRowData), + (MediaType.LLMPromptCreation, GenericDataRowData), + (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), + ], +) +def test_generic_data_row_type_by_data_row_id( + media_type, + data_type_class, + annotations_by_media_type, + hardcoded_datarow_id, +): + annotations_ndjson = annotations_by_media_type[media_type] + annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] + + label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] + + data_label = Label( + data=data_type_class(uid=hardcoded_datarow_id()), + annotations=label.annotations, + ) + + assert data_label.data.uid == label.data.uid + assert label.annotations == data_label.annotations + + +@pytest.mark.parametrize( + "media_type, data_type_class", + [ + (MediaType.Audio, GenericDataRowData), + (MediaType.Html, GenericDataRowData), + (MediaType.Image, GenericDataRowData), + (MediaType.Text, GenericDataRowData), + (MediaType.Video, GenericDataRowData), + (MediaType.Conversational, GenericDataRowData), + (MediaType.Document, GenericDataRowData), + # (MediaType.LLMPromptResponseCreation, GenericDataRowData), + # (MediaType.LLMPromptCreation, GenericDataRowData), + (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), + ], +) +def test_generic_data_row_type_by_global_key( + media_type, + data_type_class, + annotations_by_media_type, + hardcoded_global_key, +): + annotations_ndjson = annotations_by_media_type[media_type] + annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] + + label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] + + data_label = Label( + data=data_type_class(global_key=hardcoded_global_key()), + annotations=label.annotations, + ) + + assert data_label.data.global_key == label.data.global_key + assert label.annotations == data_label.annotations + + @pytest.mark.parametrize( "configured_project, media_type", [ From ee9e2029d78e1aae01936bc4325afba82dc16a9d Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:01:28 -0700 Subject: [PATCH 03/15] Vb/merge 5.0.0 (#1826) Co-authored-by: Gabe <33893811+Gabefire@users.noreply.github.com> --- libs/labelbox/src/labelbox/schema/__init__.py | 19 ++++++++++--------- .../data/serialization/ndjson/test_video.py | 5 ++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index d6b74de68..626608e2b 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,28 +1,29 @@ import labelbox.schema.asset_attachment import labelbox.schema.annotation_import +import labelbox.schema.asset_attachment +import labelbox.schema.batch import labelbox.schema.benchmark +import labelbox.schema.catalog import labelbox.schema.data_row +import labelbox.schema.data_row_metadata import labelbox.schema.dataset +import labelbox.schema.iam_integration +import labelbox.schema.identifiable +import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service +import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology +import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project +import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook -import labelbox.schema.data_row_metadata -import labelbox.schema.batch -import labelbox.schema.iam_integration -import labelbox.schema.media_type -import labelbox.schema.identifiables -import labelbox.schema.identifiable -import labelbox.schema.catalog -import labelbox.schema.ontology_kind -import labelbox.schema.project_overview diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 6c14343a4..c0047412d 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,4 +1,6 @@ import json +from operator import itemgetter + from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, @@ -10,15 +12,12 @@ from labelbox.data.annotation_types.geometry.line import Line from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.geometry.rectangle import Rectangle - from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import ( VideoClassificationAnnotation, VideoObjectAnnotation, ) - from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from operator import itemgetter def test_video(): From b728bd1cbedcab74a6e2d5df651d60debca43244 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:22:02 -0700 Subject: [PATCH 04/15] Revert "Vb/merge 5.0.0 (#1826)" (#1827) --- libs/labelbox/src/labelbox/__init__.py | 1 + libs/labelbox/src/labelbox/orm/model.py | 1 + libs/labelbox/src/labelbox/schema/__init__.py | 20 +- .../labelbox/schema/bulk_import_request.py | 1004 +++++++++++++++++ libs/labelbox/src/labelbox/schema/enums.py | 25 + libs/labelbox/src/labelbox/schema/project.py | 117 +- .../test_bulk_import_request.py | 258 +++++ .../classification_import_global_key.json | 54 + ...conversation_entity_import_global_key.json | 25 + .../data/assets/ndjson/image_import.json | 779 ++++++++++++- .../ndjson/image_import_global_key.json | 823 ++++++++++++++ .../assets/ndjson/image_import_name_only.json | 810 ++++++++++++- .../ndjson/metric_import_global_key.json | 10 + .../assets/ndjson/pdf_import_global_key.json | 155 +++ .../ndjson/polyline_import_global_key.json | 36 + .../ndjson/text_entity_import_global_key.json | 26 + .../ndjson/video_import_global_key.json | 166 +++ .../serialization/ndjson/test_checklist.py | 26 + .../ndjson/test_classification.py | 108 +- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 + .../serialization/ndjson/test_document.py | 294 +---- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 + .../serialization/ndjson/test_global_key.py | 125 +- .../data/serialization/ndjson/test_image.py | 203 +--- .../data/serialization/ndjson/test_metric.py | 170 +-- .../data/serialization/ndjson/test_mmc.py | 125 +- .../ndjson/test_ndlabel_subclass_matching.py | 19 + .../data/serialization/ndjson/test_nested.py | 236 +--- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 + .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 +-- .../data/serialization/ndjson/test_text.py | 10 + .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 326 +----- 38 files changed, 4704 insertions(+), 1844 deletions(-) create mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py create mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py create mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 850aec0be..c5212f194 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,6 +6,7 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 535ab0f7d..a2a0fbd91 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -387,6 +387,7 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] + BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 626608e2b..616565931 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,29 +1,31 @@ import labelbox.schema.asset_attachment import labelbox.schema.annotation_import import labelbox.schema.asset_attachment -import labelbox.schema.batch +import labelbox.schema.bulk_import_request +import labelbox.schema.annotation_import import labelbox.schema.benchmark -import labelbox.schema.catalog import labelbox.schema.data_row -import labelbox.schema.data_row_metadata import labelbox.schema.dataset -import labelbox.schema.iam_integration -import labelbox.schema.identifiable -import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service -import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology -import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project -import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook +import labelbox.schema.data_row_metadata +import labelbox.schema.batch +import labelbox.schema.iam_integration +import labelbox.schema.media_type +import labelbox.schema.identifiables +import labelbox.schema.identifiable +import labelbox.schema.catalog +import labelbox.schema.ontology_kind +import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py new file mode 100644 index 000000000..8e11f3261 --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/bulk_import_request.py @@ -0,0 +1,1004 @@ +import json +import time +from uuid import UUID, uuid4 +import functools + +import logging +from pathlib import Path +from google.api_core import retry +from labelbox import parser +import requests +from pydantic import ( + ValidationError, + BaseModel, + Field, + field_validator, + model_validator, + ConfigDict, + StringConstraints, +) +from typing_extensions import Literal, Annotated +from typing import ( + Any, + List, + Optional, + BinaryIO, + Dict, + Iterable, + Tuple, + Union, + Type, + Set, + TYPE_CHECKING, +) + +from labelbox import exceptions as lb_exceptions +from labelbox import utils +from labelbox.orm import query +from labelbox.orm.db_object import DbObject +from labelbox.orm.model import Relationship +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.serialization import serialize_labels +from labelbox.orm.model import Field as lb_Field + +if TYPE_CHECKING: + from labelbox import Project + from labelbox.types import Label + +NDJSON_MIME_TYPE = "application/x-ndjson" +logger = logging.getLogger(__name__) + +# TODO: Deprecate this library in place of labelimport and malprediction import library. + + +def _determinants(parent_cls: Any) -> List[str]: + return [ + k + for k, v in parent_cls.model_fields.items() + if v.json_schema_extra and "determinant" in v.json_schema_extra + ] + + +def _make_file_name(project_id: str, name: str) -> str: + return f"{project_id}__{name}.ndjson" + + +# TODO(gszpak): move it to client.py +def _make_request_data( + project_id: str, name: str, content_length: int, file_name: str +) -> dict: + query_str = """mutation createBulkImportRequestFromFilePyApi( + $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + filePayload: { + file: $file, + contentLength: $contentLength + } + }) { + %s + } + } + """ % query.results_query_part(BulkImportRequest) + variables = { + "projectId": project_id, + "name": name, + "file": None, + "contentLength": content_length, + } + operations = json.dumps({"variables": variables, "query": query_str}) + + return { + "operations": operations, + "map": (None, json.dumps({file_name: ["variables.file"]})), + } + + +def _send_create_file_command( + client, + request_data: dict, + file_name: str, + file_data: Tuple[str, Union[bytes, BinaryIO], str], +) -> dict: + response = client.execute(data=request_data, files={file_name: file_data}) + + if not response.get("createBulkImportRequest", None): + raise lb_exceptions.LabelboxError( + "Failed to create BulkImportRequest, message: %s" + % response.get("errors", None) + or response.get("error", None) + ) + + return response + + +class BulkImportRequest(DbObject): + """Represents the import job when importing annotations. + + Attributes: + name (str) + state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) + input_file_url (str): URL to your web-hosted NDJSON file + error_file_url (str): NDJSON that contains error messages for failed annotations + status_file_url (str): NDJSON that contains status for each annotation + created_at (datetime): UTC timestamp for date BulkImportRequest was created + + project (Relationship): `ToOne` relationship to Project + created_by (Relationship): `ToOne` relationship to User + """ + + name = lb_Field.String("name") + state = lb_Field.Enum(BulkImportRequestState, "state") + input_file_url = lb_Field.String("input_file_url") + error_file_url = lb_Field.String("error_file_url") + status_file_url = lb_Field.String("status_file_url") + created_at = lb_Field.DateTime("created_at") + + project = Relationship.ToOne("Project") + created_by = Relationship.ToOne("User", False, "created_by") + + @property + def inputs(self) -> List[Dict[str, Any]]: + """ + Inputs for each individual annotation uploaded. + This should match the ndjson annotations that you have uploaded. + + Returns: + Uploaded ndjson. + + * This information will expire after 24 hours. + """ + return self._fetch_remote_ndjson(self.input_file_url) + + @property + def errors(self) -> List[Dict[str, Any]]: + """ + Errors for each individual annotation uploaded. This is a subset of statuses + + Returns: + List of dicts containing error messages. Empty list means there were no errors + See `BulkImportRequest.statuses` for more details. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.error_file_url) + + @property + def statuses(self) -> List[Dict[str, Any]]: + """ + Status for each individual annotation uploaded. + + Returns: + A status for each annotation if the upload is done running. + See below table for more details + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - Field + - Description + * - uuid + - Specifies the annotation for the status row. + * - dataRow + - JSON object containing the Labelbox data row ID for the annotation. + * - status + - Indicates SUCCESS or FAILURE. + * - errors + - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.status_file_url) + + @functools.lru_cache() + def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: + """ + Fetches the remote ndjson file and caches the results. + + Args: + url (str): Can be any url pointing to an ndjson file. + Returns: + ndjson as a list of dicts. + """ + response = requests.get(url) + response.raise_for_status() + return parser.loads(response.text) + + def refresh(self) -> None: + """Synchronizes values of all fields with the database.""" + query_str, params = query.get_single(BulkImportRequest, self.uid) + res = self.client.execute(query_str, params) + res = res[utils.camel_case(BulkImportRequest.type_name())] + self._set_field_values(res) + + def wait_till_done(self, sleep_time_seconds: int = 5) -> None: + self.wait_until_done(sleep_time_seconds) + + def wait_until_done(self, sleep_time_seconds: int = 5) -> None: + """Blocks import job until certain conditions are met. + + Blocks until the BulkImportRequest.state changes either to + `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, + periodically refreshing object's state. + + Args: + sleep_time_seconds (str): a time to block between subsequent API calls + """ + while self.state == BulkImportRequestState.RUNNING: + logger.info(f"Sleeping for {sleep_time_seconds} seconds...") + time.sleep(sleep_time_seconds) + self.__exponential_backoff_refresh() + + @retry.Retry( + predicate=retry.if_exception_type( + lb_exceptions.ApiLimitError, + lb_exceptions.TimeoutError, + lb_exceptions.NetworkError, + ) + ) + def __exponential_backoff_refresh(self) -> None: + self.refresh() + + @classmethod + def from_name( + cls, client, project_id: str, name: str + ) -> "BulkImportRequest": + """Fetches existing BulkImportRequest. + + Args: + client (Client): a Labelbox client + project_id (str): BulkImportRequest's project id + name (str): name of BulkImportRequest + Returns: + BulkImportRequest object + + """ + query_str = """query getBulkImportRequestPyApi( + $projectId: ID!, $name: String!) { + bulkImportRequest(where: { + projectId: $projectId, + name: $name + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name} + response = client.execute(query_str, params=params) + return cls(client, response["bulkImportRequest"]) + + @classmethod + def create_from_url( + cls, client, project_id: str, name: str, url: str, validate=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a publicly accessible URL + to an ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + url (str): publicly accessible URL pointing to ndjson file containing predictions + validate (bool): a flag indicating if there should be a validation + if `url` is valid ndjson + Returns: + BulkImportRequest object + """ + if validate: + logger.warn( + "Validation is turned on. The file will be downloaded locally and processed before uploading." + ) + res = requests.get(url) + data = parser.loads(res.text) + _validate_ndjson(data, client.get_project(project_id)) + + query_str = """mutation createBulkImportRequestPyApi( + $projectId: ID!, $name: String!, $fileUrl: String!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + fileUrl: $fileUrl + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name, "fileUrl": url} + bulk_import_request_response = client.execute(query_str, params=params) + return cls( + client, bulk_import_request_response["createBulkImportRequest"] + ) + + @classmethod + def create_from_objects( + cls, + client, + project_id: str, + name: str, + predictions: Union[Iterable[Dict], Iterable["Label"]], + validate=True, + ) -> "BulkImportRequest": + """ + Creates a `BulkImportRequest` from an iterable of dictionaries. + + Conforms to JSON predictions format, e.g.: + ``{ + "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", + "schemaId": "ckappz7d700gn0zbocmqkwd9i", + "dataRow": { + "id": "ck1s02fqxm8fi0757f0e6qtdc" + }, + "bbox": { + "top": 48, + "left": 58, + "height": 865, + "width": 1512 + } + }`` + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + predictions (Iterable[dict]): iterable of dictionaries representing predictions + validate (bool): a flag indicating if there should be a validation + if `predictions` is valid ndjson + Returns: + BulkImportRequest object + """ + if not isinstance(predictions, list): + raise TypeError( + f"annotations must be in a form of Iterable. Found {type(predictions)}" + ) + ndjson_predictions = serialize_labels(predictions) + + if validate: + _validate_ndjson(ndjson_predictions, client.get_project(project_id)) + + data_str = parser.dumps(ndjson_predictions) + if not data_str: + raise ValueError("annotations cannot be empty") + + data = data_str.encode("utf-8") + file_name = _make_file_name(project_id, name) + request_data = _make_request_data( + project_id, name, len(data_str), file_name + ) + file_data = (file_name, data, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, + request_data=request_data, + file_name=file_name, + file_data=file_data, + ) + + return cls(client, response_data["createBulkImportRequest"]) + + @classmethod + def create_from_local_file( + cls, client, project_id: str, name: str, file: Path, validate_file=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a local ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + file (Path): local ndjson file with predictions + validate_file (bool): a flag indicating if there should be a validation + if `file` is a valid ndjson file + Returns: + BulkImportRequest object + + """ + file_name = _make_file_name(project_id, name) + content_length = file.stat().st_size + request_data = _make_request_data( + project_id, name, content_length, file_name + ) + + with file.open("rb") as f: + if validate_file: + reader = parser.reader(f) + # ensure that the underlying json load call is valid + # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 + # by iterating through the file so we only store + # each line in memory rather than the entire file + try: + _validate_ndjson(reader, client.get_project(project_id)) + except ValueError: + raise ValueError(f"{file} is not a valid ndjson file") + else: + f.seek(0) + file_data = (file.name, f, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, request_data, file_name, file_data + ) + return cls(client, response_data["createBulkImportRequest"]) + + def delete(self) -> None: + """Deletes the import job and also any annotations created by this import. + + Returns: + None + """ + id_param = "bulk_request_id" + query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { + deleteBulkImportRequest(where: {id: $%s}) { + id + name + } + }""" % (id_param, id_param) + self.client.execute(query_str, {id_param: self.uid}) + + +def _validate_ndjson( + lines: Iterable[Dict[str, Any]], project: "Project" +) -> None: + """ + Client side validation of an ndjson object. + + Does not guarentee that an upload will succeed for the following reasons: + * We are not checking the data row types which will cause the following errors to slip through + * Missing frame indices will not causes an error for videos + * Uploaded annotations for the wrong data type will pass (Eg. entity on images) + * We are not checking bounds of an asset (Eg. frame index, image height, text location) + + Args: + lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines + project (Project): id of project for which predictions will be imported + + Raises: + MALValidationError: Raise for invalid NDJson + UuidError: Duplicate UUID in upload + """ + feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( + project.ontology() + ) + uids: Set[str] = set() + for idx, line in enumerate(lines): + try: + annotation = NDAnnotation(**line) + annotation.validate_instance( + feature_schemas_by_id, feature_schemas_by_name + ) + uuid = str(annotation.uuid) + if uuid in uids: + raise lb_exceptions.UuidError( + f"{uuid} already used in this import job, " + "must be unique for the project." + ) + uids.add(uuid) + except (ValidationError, ValueError, TypeError, KeyError) as e: + raise lb_exceptions.MALValidationError( + f"Invalid NDJson on line {idx}" + ) from e + + +# The rest of this file contains objects for MAL validation +def parse_classification(tool): + """ + Parses a classification from an ontology. Only radio, checklist, and text are supported for mal + + Args: + tool (dict) + + Returns: + dict + """ + if tool["type"] in ["radio", "checklist"]: + option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] + option_names = [r["value"] for r in tool["options"]] + return { + "tool": tool["type"], + "featureSchemaId": tool["featureSchemaId"], + "name": tool["name"], + "options": [*option_schema_ids, *option_names], + } + elif tool["type"] == "text": + return { + "tool": tool["type"], + "name": tool["name"], + "featureSchemaId": tool["featureSchemaId"], + } + + +def get_mal_schemas(ontology): + """ + Converts a project ontology to a dict for easier lookup during ndjson validation + + Args: + ontology (Ontology) + Returns: + Dict, Dict : Useful for looking up a tool from a given feature schema id or name + """ + + valid_feature_schemas_by_schema_id = {} + valid_feature_schemas_by_name = {} + for tool in ontology.normalized["tools"]: + classifications = [ + parse_classification(classification_tool) + for classification_tool in tool["classifications"] + ] + classifications_by_schema_id = { + v["featureSchemaId"]: v for v in classifications + } + classifications_by_name = {v["name"]: v for v in classifications} + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + valid_feature_schemas_by_name[tool["name"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + for tool in ontology.normalized["classifications"]: + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( + parse_classification(tool) + ) + valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) + return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name + + +class Bbox(BaseModel): + top: float + left: float + height: float + width: float + + +class Point(BaseModel): + x: float + y: float + + +class FrameLocation(BaseModel): + end: int + start: int + + +class VideoSupported(BaseModel): + # Note that frames are only allowed as top level inferences for video + frames: Optional[List[FrameLocation]] = None + + +# Base class for a special kind of union. +class SpecialUnion: + def __new__(cls, **kwargs): + return cls.build(kwargs) + + @classmethod + def __get_validators__(cls): + yield cls.build + + @classmethod + def get_union_types(cls): + if not issubclass(cls, SpecialUnion): + raise TypeError("{} must be a subclass of SpecialUnion") + + union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] + if len(union_types) < 1: + raise TypeError( + "Class {cls} should inherit from a union of objects to build" + ) + if len(union_types) > 1: + raise TypeError( + f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" + ) + return union_types[0].__args__[0].__args__ + + @classmethod + def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": + """ + Checks through all objects in the union to see which matches the input data. + Args: + data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union + raises: + KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion + ValidationError: Error while trying to construct a specific object in the union + + """ + if isinstance(data, BaseModel): + data = data.model_dump() + + top_level_fields = [] + max_match = 0 + matched = None + + for type_ in cls.get_union_types(): + determinate_fields = _determinants(type_) + top_level_fields.append(determinate_fields) + matches = sum([val in determinate_fields for val in data]) + if matches == len(determinate_fields) and matches > max_match: + max_match = matches + matched = type_ + + if matched is not None: + # These two have the exact same top level keys + if matched in [NDRadio, NDText]: + if isinstance(data["answer"], dict): + matched = NDRadio + elif isinstance(data["answer"], str): + matched = NDText + else: + raise TypeError( + f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" + ) + return matched(**data) + else: + raise KeyError( + f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." + ) + + @classmethod + def schema(cls): + results = {"definitions": {}} + for cl in cls.get_union_types(): + schema = cl.schema() + results["definitions"].update(schema.pop("definitions")) + results[cl.__name__] = schema + return results + + +class DataRow(BaseModel): + id: str + + +class NDFeatureSchema(BaseModel): + schemaId: Optional[str] = None + name: Optional[str] = None + + @model_validator(mode="after") + def most_set_one(self): + if self.schemaId is None and self.name is None: + raise ValueError( + "Must set either schemaId or name for all feature schemas" + ) + return self + + +class NDBase(NDFeatureSchema): + ontology_type: str + uuid: UUID + dataRow: DataRow + model_config = ConfigDict(extra="forbid") + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + if self.name: + if self.name not in valid_feature_schemas_by_name: + raise ValueError( + f"Name {self.name} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_name[self.name]["tool"] + ): + raise ValueError( + f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" + ) + + if self.schemaId: + if self.schemaId not in valid_feature_schemas_by_id: + raise ValueError( + f"Schema id {self.schemaId} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_id[self.schemaId]["tool"] + ): + raise ValueError( + f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" + ) + + def validate_instance( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + self.validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + + +###### Classifications ###### + + +class NDText(NDBase): + ontology_type: Literal["text"] = "text" + answer: str = Field(json_schema_extra={"determinant": True}) + # No feature schema to check + + +class NDChecklist(VideoSupported, NDBase): + ontology_type: Literal["checklist"] = "checklist" + answers: List[NDFeatureSchema] = Field( + json_schema_extra={"determinant": True} + ) + + @field_validator("answers", mode="before") + def validate_answers(cls, value, field): + # constr not working with mypy. + if not len(value): + raise ValueError("Checklist answers should not be empty") + return value + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + # Test top level feature schema for this tool + super(NDChecklist, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + # Test the feature schemas provided to the answer field + if len( + set([answer.name or answer.schemaId for answer in self.answers]) + ) != len(self.answers): + raise ValueError( + f"Duplicated featureSchema found for checklist {self.uuid}" + ) + for answer in self.answers: + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if answer.name not in options and answer.schemaId not in options: + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" + ) + + +class NDRadio(VideoSupported, NDBase): + ontology_type: Literal["radio"] = "radio" + answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDRadio, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if ( + self.answer.name not in options + and self.answer.schemaId not in options + ): + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" + ) + + +# A union with custom construction logic to improve error messages +class NDClassification( + SpecialUnion, + Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore +): ... + + +###### Tools ###### + + +class NDBaseTool(NDBase): + classifications: List[NDClassification] = [] + + # This is indepdent of our problem + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDBaseTool, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + for classification in self.classifications: + classification.validate_feature_schemas( + valid_feature_schemas_by_name[self.name][ + "classificationsBySchemaId" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsBySchemaId" + ], + valid_feature_schemas_by_name[self.name][ + "classificationsByName" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsByName" + ], + ) + + @field_validator("classifications", mode="before") + def validate_subclasses(cls, value, field): + # Create uuid and datarow id so we don't have to define classification objects twice + # This is caused by the fact that we require these ids for top level classifications but not for subclasses + results = [] + dummy_id = "child".center(25, "_") + for row in value: + results.append( + {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} + ) + return results + + +class NDPolygon(NDBaseTool): + ontology_type: Literal["polygon"] = "polygon" + polygon: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("polygon") + def is_geom_valid(cls, v): + if len(v) < 3: + raise ValueError( + f"A polygon must have at least 3 points to be valid. Found {v}" + ) + return v + + +class NDPolyline(NDBaseTool): + ontology_type: Literal["line"] = "line" + line: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("line") + def is_geom_valid(cls, v): + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + return v + + +class NDRectangle(NDBaseTool): + ontology_type: Literal["rectangle"] = "rectangle" + bbox: Bbox = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class NDPoint(NDBaseTool): + ontology_type: Literal["point"] = "point" + point: Point = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class EntityLocation(BaseModel): + start: int + end: int + + +class NDTextEntity(NDBaseTool): + ontology_type: Literal["named-entity"] = "named-entity" + location: EntityLocation = Field(json_schema_extra={"determinant": True}) + + @field_validator("location") + def is_valid_location(cls, v): + if isinstance(v, BaseModel): + v = v.model_dump() + + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + if v["start"] < 0: + raise ValueError(f"Text location must be positive. Found {v}") + if v["start"] > v["end"]: + raise ValueError( + f"Text start location must be less or equal than end. Found {v}" + ) + return v + + +class RLEMaskFeatures(BaseModel): + counts: List[int] + size: List[int] + + @field_validator("counts") + def validate_counts(cls, counts): + if not all([count >= 0 for count in counts]): + raise ValueError( + "Found negative value for counts. They should all be zero or positive" + ) + return counts + + @field_validator("size") + def validate_size(cls, size): + if len(size) != 2: + raise ValueError( + f"Mask `size` should have two ints representing height and with. Found : {size}" + ) + if not all([count > 0 for count in size]): + raise ValueError( + f"Mask `size` should be a postitive int. Found : {size}" + ) + return size + + +class PNGMaskFeatures(BaseModel): + # base64 encoded png bytes + png: str + + +class URIMaskFeatures(BaseModel): + instanceURI: str + colorRGB: Union[List[int], Tuple[int, int, int]] + + @field_validator("colorRGB") + def validate_color(cls, colorRGB): + # Does the dtype matter? Can it be a float? + if not isinstance(colorRGB, (tuple, list)): + raise ValueError( + f"Received color that is not a list or tuple. Found : {colorRGB}" + ) + elif len(colorRGB) != 3: + raise ValueError( + f"Must provide RGB values for segmentation colors. Found : {colorRGB}" + ) + elif not all([0 <= color <= 255 for color in colorRGB]): + raise ValueError( + f"All rgb colors must be between 0 and 255. Found : {colorRGB}" + ) + return colorRGB + + +class NDMask(NDBaseTool): + ontology_type: Literal["superpixel"] = "superpixel" + mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( + json_schema_extra={"determinant": True} + ) + + +# A union with custom construction logic to improve error messages +class NDTool( + SpecialUnion, + Type[ # type: ignore + Union[ + NDMask, + NDTextEntity, + NDPoint, + NDRectangle, + NDPolyline, + NDPolygon, + ] + ], +): ... + + +class NDAnnotation( + SpecialUnion, + Type[Union[NDTool, NDClassification]], # type: ignore +): + @classmethod + def build(cls: Any, data) -> "NDBase": + if not isinstance(data, dict): + raise ValueError("value must be dict") + errors = [] + for cl in cls.get_union_types(): + try: + return cl(**data) + except KeyError as e: + errors.append(f"{cl.__name__}: {e}") + + raise ValueError( + "Unable to construct any annotation.\n{}".format("\n".join(errors)) + ) + + @classmethod + def schema(cls): + data = {"definitions": {}} + for type_ in cls.get_union_types(): + schema_ = type_.schema() + data["definitions"].update(schema_.pop("definitions")) + data[type_.__name__] = schema_ + return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index dfc87c8a4..6f8aebc58 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,6 +1,31 @@ from enum import Enum +class BulkImportRequestState(Enum): + """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). + + If you are not usinig MEA continue using BulkImportRequest. + AnnotationImports are in beta and will change soon. + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - State + - Description + * - RUNNING + - Indicates that the import job is not done yet. + * - FAILED + - Indicates the import job failed. Check `BulkImportRequest.errors` for more information + * - FINISHED + - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information + """ + + RUNNING = "RUNNING" + FAILED = "FAILED" + FINISHED = "FINISHED" + + class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index ae97089a7..96b4cc6f8 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -9,17 +9,21 @@ TYPE_CHECKING, Any, Dict, + Iterable, List, Optional, Tuple, + TypeVar, Union, get_args, ) +from urllib.parse import urlparse from lbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, + ResourceConflict, ResourceNotFoundError, error_message_for_unparsed_graphql_error, ) # type: ignore @@ -35,6 +39,7 @@ from labelbox.schema.export_filters import ( ProjectExportFilters, build_filters, + validate_datetime, ) from labelbox.schema.export_params import ProjectExportParams from labelbox.schema.export_task import ExportTask @@ -54,6 +59,7 @@ from labelbox.schema.model_config import ModelConfig from labelbox.schema.ontology_kind import ( EditorTaskType, + OntologyKind, UploadType, ) from labelbox.schema.project_model_config import ProjectModelConfig @@ -66,7 +72,7 @@ from labelbox.schema.task_queue import TaskQueue if TYPE_CHECKING: - pass + from labelbox import BulkImportRequest DataRowPriority = int @@ -571,7 +577,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - "This function has only been tested to work with the Editor front end. Found %s", + f"This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -765,7 +771,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - "Batch exceeds max size, break into smaller batches" + f"Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1033,7 +1039,8 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Batch was not created successfully: " + json.dumps(task.errors) + f"Batch was not created successfully: " + + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1255,7 +1262,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Priority was not updated successfully: " + f"Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1307,6 +1314,33 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] + def bulk_import_requests(self) -> PaginatedCollection: + """Returns bulk import request objects which are used in model-assisted labeling. + These are returned with the oldest first, and most recent last. + """ + + id_param = "project_id" + query_str = """query ListAllImportRequestsPyApi($%s: ID!) { + bulkImportRequests ( + where: { projectId: $%s } + skip: %%d + first: %%d + ) { + %s + } + }""" % ( + id_param, + id_param, + query.results_query_part(Entity.BulkImportRequest), + ) + return PaginatedCollection( + self.client, + query_str, + {id_param: str(self.uid)}, + ["bulkImportRequests"], + Entity.BulkImportRequest, + ) + def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1408,7 +1442,7 @@ def move_data_rows_to_task_queue( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Data rows were not moved successfully: " + f"Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1418,6 +1452,77 @@ def _wait_for_task(self, task_id: str) -> Task: return task + def upload_annotations( + self, + name: str, + annotations: Union[str, Path, Iterable[Dict]], + validate: bool = False, + ) -> "BulkImportRequest": # type: ignore + """Uploads annotations to a new Editor project. + + Args: + name (str): name of the BulkImportRequest job + annotations (str or Path or Iterable): + url that is publicly accessible by Labelbox containing an + ndjson file + OR local path to an ndjson file + OR iterable of annotation rows + validate (bool): + Whether or not to validate the payload before uploading. + Returns: + BulkImportRequest + """ + + if isinstance(annotations, str) or isinstance(annotations, Path): + + def _is_url_valid(url: Union[str, Path]) -> bool: + """Verifies that the given string is a valid url. + + Args: + url: string to be checked + Returns: + True if the given url is valid otherwise False + + """ + if isinstance(url, Path): + return False + parsed = urlparse(url) + return bool(parsed.scheme) and bool(parsed.netloc) + + if _is_url_valid(annotations): + return Entity.BulkImportRequest.create_from_url( + client=self.client, + project_id=self.uid, + name=name, + url=str(annotations), + validate=validate, + ) + else: + path = Path(annotations) + if not path.exists(): + raise FileNotFoundError( + f"{annotations} is not a valid url nor existing local file" + ) + return Entity.BulkImportRequest.create_from_local_file( + client=self.client, + project_id=self.uid, + name=name, + file=path, + validate_file=validate, + ) + elif isinstance(annotations, Iterable): + return Entity.BulkImportRequest.create_from_objects( + client=self.client, + project_id=self.uid, + name=name, + predictions=annotations, # type: ignore + validate=validate, + ) + else: + raise ValueError( + f"Invalid annotations given of type: {type(annotations)}" + ) + def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py new file mode 100644 index 000000000..9abae1422 --- /dev/null +++ b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py @@ -0,0 +1,258 @@ +from unittest.mock import patch +import uuid +from labelbox import parser, Project +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +import pytest +import random +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.rectangle import ( + Rectangle, + RectangleUnit, +) +from labelbox.data.annotation_types.label import Label +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.ner import ( + DocumentEntity, + DocumentTextSelection, +) +from labelbox.data.annotation_types.video import VideoObjectAnnotation + +from labelbox.data.serialization import NDJsonConverter +from labelbox.exceptions import MALValidationError, UuidError +from labelbox.schema.bulk_import_request import BulkImportRequest +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.annotation_import import LabelImport, MALPredictionImport +from labelbox.schema.media_type import MediaType + +""" +- Here we only want to check that the uploads are calling the validation +- Then with unit tests we can check the types of errors raised +""" +# TODO: remove library once bulk import requests are removed + + +@pytest.mark.order(1) +def test_create_from_url(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_file(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + with pytest.raises(MALValidationError): + module_project.upload_annotations( + name=name, annotations=url, validate=True + ) + # Schema ids shouldn't match + + +def test_create_from_objects( + module_project: Project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_create_from_label_objects( + module_project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + labels = list(NDJsonConverter.deserialize(predictions)) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=labels + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + normalized_predictions = list(NDJsonConverter.serialize(labels)) + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, normalized_predictions + ) + + +def test_create_from_local_file( + tmp_path, predictions, module_project, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + file_name = f"{name}.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + parser.dump(predictions, f) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=str(file_path), validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_get(client, module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + bulk_import_request = BulkImportRequest.from_name( + client, project_id=module_project.uid, name=name + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_ndjson(tmp_path, module_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + +def test_validate_ndjson_uuid(tmp_path, module_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + uid = str(uuid.uuid4()) + repeat_uuid[0]["uuid"] = uid + repeat_uuid[1]["uuid"] = uid + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + +@pytest.mark.skip( + "Slow test and uses a deprecated api endpoint for annotation imports" +) +def test_wait_till_done(rectangle_inference, project): + name = str(uuid.uuid4()) + url = project.client.upload_data( + content=parser.dumps(rectangle_inference), sign=True + ) + bulk_import_request = project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert len(bulk_import_request.inputs) == 1 + bulk_import_request.wait_until_done() + assert bulk_import_request.state == BulkImportRequestState.FINISHED + + # Check that the status files are being returned as expected + assert len(bulk_import_request.errors) == 0 + assert len(bulk_import_request.inputs) == 1 + assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] + assert len(bulk_import_request.statuses) == 1 + assert bulk_import_request.statuses[0]["status"] == "SUCCESS" + assert ( + bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] + ) + + +def test_project_bulk_import_requests(module_project, predictions): + result = module_project.bulk_import_requests() + assert len(list(result)) == 0 + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + result = module_project.bulk_import_requests() + assert len(list(result)) == 3 + + +def test_delete(module_project, predictions): + name = str(uuid.uuid4()) + + bulk_import_requests = module_project.bulk_import_requests() + [ + bulk_import_request.delete() + for bulk_import_request in bulk_import_requests + ] + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 1 + + bulk_import_request.delete() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json new file mode 100644 index 000000000..4de15e217 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json @@ -0,0 +1,54 @@ +[ + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "confidence": 0.8, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" + }, + { + "answer": [ + { + "schemaId": "ckrb1sfl8099e0y919v260awv", + "confidence": 0.82, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } + ], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json new file mode 100644 index 000000000..83a95e5bf --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json @@ -0,0 +1,25 @@ +[{ + "location": { + "start": 67, + "end": 128 + }, + "messageId": "some-message-id", + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] +}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 75fe36e44..91563b8ae 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,17 +8,16 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - }, - "classifications": [] + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -29,17 +28,20 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - }, - "classifications": [] + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -48,39 +50,762 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 }, { - "x": 15.0, - "y": 20.0 + "x": 1119, + "y": 934 }, { - "x": 20.0, - "y": 25.0 + "x": 1118, + "y": 935 }, { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json new file mode 100644 index 000000000..591e40cf6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json @@ -0,0 +1,823 @@ +[ + { + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + "schemaId": "ckrazcueb16og0z6609jj7y3y", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + { + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + "schemaId": "ckrazcuec16ok0z66f956apb7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.834, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } + }, + { + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + "schemaId": "ckrazcuec16oi0z66dzrd8pfl", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.986, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "polygon": [ + { + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 + } + ] + }, + { + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + "schemaId": "ckrazcuec16om0z66bhhh4tp7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "point": { + "x": 2122, + "y": 1457 + } + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 466a03594..82be4cdab 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,86 +1,826 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "ckrazcueb16og0z6609jj7y3y", + "name": "box a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "confidence": 0.854, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.7 } - ], - "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - } + ] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "ckrazcuec16ok0z66f956apb7", + "name": "mask a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.834, + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + }, + "confidence": 0.685, "customMetrics": [ { "name": "customMetric1", - "value": 0.3 + "value": 0.4 + }, + { + "name": "customMetric2", + "value": 0.9 } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - } + ] }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "ckrazcuec16oi0z66dzrd8pfl", + "name": "polygon a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.986, + "confidence": 0.71, "customMetrics": [ { "name": "customMetric1", - "value": 0.9 + "value": 0.1 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 }, { - "x": 15.0, - "y": 20.0 + "x": 1099, + "y": 911 }, { - "x": 20.0, - "y": 25.0 + "x": 1100, + "y": 911 }, { - "x": 10.0, - "y": 20.0 + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "ckrazcuec16om0z66bhhh4tp7", + "name": "point a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, + "confidence": 0.77, + "customMetrics": [ + { + "name": "customMetric2", + "value": 1.2 + } + ], "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json new file mode 100644 index 000000000..31be5a4c7 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json @@ -0,0 +1,10 @@ +[ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "aggregation": "ARITHMETIC_MEAN", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "metricValue": 0.1 + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json new file mode 100644 index 000000000..f4b4894f6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json @@ -0,0 +1,155 @@ +[{ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 4, + "unit": "POINTS", + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 162.73, + "left": 32.45, + "height": 388.16999999999996, + "width": 101.66000000000001 + } +}, { + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "bbox": { + "top": 223.26, + "left": 251.42, + "height": 457.03999999999996, + "width": 186.78 + } +}, { + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 6, + "unit": "POINTS", + "confidence": 0.99, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 32.52, + "left": 218.17, + "height": 231.73, + "width": 110.56000000000003 + } +}, { + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "confidence": 0.89, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 117.39, + "left": 4.25, + "height": 456.9200000000001, + "width": 164.83 + } +}, { + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 8, + "unit": "POINTS", + "bbox": { + "top": 82.13, + "left": 217.28, + "height": 279.76, + "width": 82.43000000000004 + } +}, { + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 3, + "unit": "POINTS", + "bbox": { + "top": 298.12, + "left": 83.34, + "height": 203.83000000000004, + "width": 0.37999999999999545 + } +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "named_entity", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] +} +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json new file mode 100644 index 000000000..d6a9eecbd --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json @@ -0,0 +1,36 @@ +[ + { + "line": [ + { + "x": 2534.353, + "y": 249.471 + }, + { + "x": 2429.492, + "y": 182.092 + }, + { + "x": 2294.322, + "y": 221.962 + } + ], + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-line", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.58, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json new file mode 100644 index 000000000..1f26d8dc8 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json @@ -0,0 +1,26 @@ +[ + { + "location": { + "start": 67, + "end": 128 + }, + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json new file mode 100644 index 000000000..11e0753d9 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json @@ -0,0 +1,166 @@ +[{ + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" + }, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "schemaId": "ckrb1sfl8099e0y919v260awv" + }], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" +}, { + "classifications": [], + "schemaId": + "cl5islwg200gfci6g0oitaypu", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 + }, + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5iw0roz00lwci6g5jni62vs", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 + }, + "classifications": [] + }] + }] +}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index fb78916f4..c9e9bcdb9 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -36,6 +36,13 @@ def test_serialization_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_classification(): label = Label( @@ -125,6 +132,12 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested(): label = Label( @@ -217,6 +230,13 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested_2(): label = Label( @@ -306,3 +326,9 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 82adce99c..8dcb17f0b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,73 +1,15 @@ import json -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - Radio, - Text, -) -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - ClassificationAnswer, -) -from labelbox.data.mixins import CustomMetric - def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.8, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.82, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -76,48 +18,6 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - name="classification a", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="choice 1", - ), - ), - ), - ClassificationAnnotation( - name="classification b", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.945, - name="choice 2", - ) - ], - ), - ), - ClassificationAnnotation( - name="classification c", - extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 5aa7285e2..3269d9c96 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,12 +1,8 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -103,62 +99,25 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - -def test_conversation_entity_import(): - with open( - "tests/data/assets/ndjson/conversation_entity_import.json", "r" - ) as file: - data = json.load(file) - - label = lb_types.Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) - assert res == data + deserialized_label = list(NDJsonConverter().deserialize(ndjson)) + deserialized_label[0].annotations[0].extra.pop("uuid") + assert deserialized_label[0].model_dump(exclude_none=True) == label[ + 0 + ].model_dump(exclude_none=True) -def test_conversation_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/conversation_entity_import.json", "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_conversation_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - label = lb_types.Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, extra={}, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 999e1bda5..333c00250 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,29 +1,67 @@ +from copy import copy +import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter +from labelbox.data.serialization.ndjson.objects import ( + NDDicomSegments, + NDDicomSegment, + NDDicomLine, +) + +""" +Data gen prompt test data +""" + +prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), +) + +prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, +} + +data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], +) + +""" +Prompt annotation test +""" def test_serialize_label(): - prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - extra={"uuid": "test"}, - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), - ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + # Remove uuid field since this is a random value that can not be specified also meant for relationships + del serialized_label["uuid"] + assert serialized_label == prompt_text_ndjson + - prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "test", - } - - data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], +def test_deserialize_label(): + deserialized_label = next( + NDJsonConverter().deserialize([prompt_text_ndjson]) ) - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + if hasattr(deserialized_label.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized_label.annotations[0].extra = {} + assert deserialized_label.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) - assert serialized_label == prompt_text_ndjson + +def test_serialize_deserialize_label(): + serialized = list(NDJsonConverter.serialize([data_gen_label])) + deserialized = next(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized.annotations[0].extra = {} + assert deserialized.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 6a00fa871..4ea0586c7 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,5 +1,6 @@ from copy import copy import pytest +import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -182,3 +183,28 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson + + +@pytest.mark.parametrize("label, ndjson", labels_ndjsons) +def test_deserialize_label(label, ndjson): + deserialized_label = next(NDJsonConverter().deserialize([ndjson])) + if hasattr(deserialized_label.annotations[0], "extra"): + deserialized_label.annotations[0].extra = {} + for i, annotation in enumerate(deserialized_label.annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value + + +@pytest.mark.parametrize("label", labels) +def test_serialize_deserialize_label(label): + serialized = list(NDJsonConverter.serialize([label])) + deserialized = list(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized[0].annotations[0], "extra"): + deserialized[0].annotations[0].extra = {} + for i, annotation in enumerate(deserialized[0].annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index fcdf4368b..b00182275 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,19 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - RectangleUnit, - Point, - DocumentRectangle, - DocumentEntity, - DocumentTextSelection, -) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -66,144 +53,10 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - labels = [ - Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.89, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_with_name_only(): @@ -212,135 +65,26 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id=None, - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.74, - name="boxy", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson + + +def test_pdf_bbox_deserialize(): + deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) + deserialized[0].annotations[0].extra = {} + assert ( + deserialized[0].annotations[0].value + == bbox_labels[0].annotations[0].value + ) + assert ( + deserialized[0].annotations[0].name + == bbox_labels[0].annotations[0].name + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index a0cd13e81..d6efab3ee 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,6 +9,8 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=GenericDataRowData( uid="cklr9mr4m5iao0rb6cvxu4qbn", + file_path=None, + frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -20,7 +22,6 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", - "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -587,4 +588,31 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - assert label == video_serialized_bbox_label() + manual_label = video_serialized_bbox_label() + + for key in label.keys(): + # ignore uuid because we randomize if there was none + if key != "uuid": + assert label[key] == manual_label[key] + + assert len(label["segments"]) == 2 + assert len(label["segments"][0]["keyframes"]) == 2 + assert len(label["segments"][1]["keyframes"]) == 4 + + # #converts back only the keyframes. should be the sum of all prev segments + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + assert len(label.annotations) == 6 + + +def test_confidence_is_ignored(): + label = video_bbox_label() + serialized_labels = NDJsonConverter.serialize([label]) + label = next(serialized_labels) + label["confidence"] = 0.453 + label["segments"][0]["confidence"] = 0.453 + + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + for annotation in label.annotations: + assert annotation.confidence is None diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 7b03a8447..7daf17188 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,6 +34,16 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "text_answer" + assert annotation_value.confidence == 0.5 + def test_nested_serialization(): label = Label( @@ -92,3 +102,19 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + answer = annotation.value.answer[0] + assert answer.confidence == 0.9 + assert answer.name == "first_answer" + + classification_answer = answer.classifications[0].value.answer + assert classification_answer.confidence == 0.8 + assert classification_answer.name == "first_sub_radio_answer" + + sub_classification_answer = classification_answer.classifications[0].value + assert type(sub_classification_answer) is Text + assert sub_classification_answer.answer == "nested answer" + assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index d104a691e..2b3fa7f8c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,74 +1,73 @@ -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import json +import pytest + +from labelbox.data.serialization.ndjson.classification import NDRadio + from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - Radio, - ClassificationAnswer, -) +from labelbox.data.serialization.ndjson.objects import NDLine -def test_generic_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def round_dict(data): + if isinstance(data, dict): + for key in data: + if isinstance(data[key], float): + data[key] = int(data[key]) + elif isinstance(data[key], dict): + data[key] = round_dict(data[key]) + elif isinstance(data[key], (list, tuple)): + data[key] = [round_dict(r) for r in data[key]] - label = Label( - data=GenericDataRowData( - global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) + return data + + +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/classification_import_global_key.json", + "tests/data/assets/ndjson/metric_import_global_key.json", + "tests/data/assets/ndjson/polyline_import_global_key.json", + "tests/data/assets/ndjson/text_entity_import_global_key.json", + "tests/data/assets/ndjson/conversation_entity_import_global_key.json", + ], +) +def test_many_types(filename: str): + with open(filename, "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == data + f.close() - res = list(NDJsonConverter.serialize([label])) - assert res == expected +def test_image(): + with open( + "tests/data/assets/ndjson/image_import_global_key.json", "r" + ) as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() -def test_dict_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def test_pdf(): + with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() - label = Label( - data={ - "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", - }, - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) - res = list(NDJsonConverter.serialize([label])) +def test_video(): + with open( + "tests/data/assets/ndjson/video_import_global_key.json", "r" + ) as f: + data = json.load(f) - assert res == expected + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] + f.close() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 4d615658c..94198999f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,8 +1,4 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -13,7 +9,6 @@ ObjectAnnotation, MaskData, ) -from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -33,74 +28,12 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_image_with_name_only(): @@ -109,74 +42,11 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - name="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - name="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - name="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - name="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask(): @@ -186,11 +56,10 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], - "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -198,54 +67,16 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": (255, 0, 0), + "colorRGB": [255, 0, 0], }, - "classifications": [], }, ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) - mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) - mask_numpy = mask_numpy.astype(np.uint8) - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.8, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Mask( - mask=MaskData(arr=mask_numpy), - color=(1, 1, 1), - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - extra={}, - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=(255, 0, 0), - ), - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 40e098405..45c5c67bf 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,166 +1,38 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.annotation_types.metrics.confusion_matrix import ( - ConfusionMatrixMetric, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ScalarMetric, - ScalarMetricAggregation, - ConfusionMatrixAggregation, -) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert reserialized == data def test_custom_scalar_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: 0.1, 0.2: 0.5}, - "metricName": "custom_iou", - "aggregation": "SUM", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value=0.1, - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value={"0.1": 0.1, "0.2": 0.5}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_scalar_import.json", "r" + ) as file: + data = json.load(file) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) def test_custom_confusion_matrix_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (1, 1, 2, 3), - "metricName": "50%_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (0, 1, 2, 5), - "metricName": "50%_iou", - "featureName": "sample_class", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - "metricName": "50%_iou", - "aggregation": "CONFUSION_MATRIX", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ConfusionMatrixMetric( - value=(1, 1, 2, 3), - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value=(0, 1, 2, 5), - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" + ) as file: + data = json.load(file) - assert data == res + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 202f793fe..69594ff73 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,125 +1,32 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest from labelbox.data.serialization import NDJsonConverter -from labelbox.types import ( - Label, - MessageEvaluationTaskAnnotation, - MessageSingleSelectionTask, - MessageMultiSelectionTask, - MessageInfo, - OrderedMessageInfo, - MessageRankingTask, -) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cnjencjencjfencvj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="single-selection", - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, - value=MessageSingleSelectionTask( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - parent_message_id="clxfznjb800073b6v43ppx9ca", - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cfcerfvergerfefj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="multi-selection", - extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, - value=MessageMultiSelectionTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - selected_messages=[ - MessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - ) - ], - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=2, - ), - ], - ), - ) - ], - ), - ] + deserialized = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(deserialized)) - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert data == reserialized def test_mesage_ranking_task_wrong_order_serialization(): + with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: + data = json.load(file) + + some_ranking_task = next( + task + for task in data + if task["messageEvaluationTask"]["format"] == "message-ranking" + ) + some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ + "order" + ] = 3 + with pytest.raises(ValueError): - ( - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={ - "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" - }, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - ], - ), - ) - ], - ), - ) + list(NDJsonConverter.deserialize([some_ranking_task])) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py new file mode 100644 index 000000000..790bd87b3 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py @@ -0,0 +1,19 @@ +import json +from labelbox.data.serialization.ndjson.label import NDLabel +from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle +import pytest + + +def test_bad_annotation_input(): + data = [{"test": 3}] + with pytest.raises(ValueError): + NDLabel(**{"annotations": data}) + + +def test_correct_annotation_input(): + with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: + data = json.load(f) + assert isinstance( + NDLabel(**{"annotations": [data[0]]}).annotations[0], + NDDocumentRectangle, + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index 3633c9cbe..e0f0df0e6 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,135 +1,13 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Rectangle, - Point, - ClassificationAnnotation, - Radio, - ClassificationAnswer, - Text, - Checklist, -) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.34, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "5d03213e-4408-456c-9eca-cf0723202961", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.894, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={}, - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -138,112 +16,6 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="box a", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.811, - name="first answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box b", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification b", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.815, - name="second answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification c", - value=Checklist( - answer=[ - ClassificationAnswer( - name="third answer", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="a string", - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index cd11d97fe..97d48a14e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,76 +1,18 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ObjectAnnotation, Point, Line, Label - - -def test_polyline_import_with_confidence(): - with open( - "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" - ) as file: - data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_polyline_import_without_confidence(): - with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/polyline_without_confidence_import.json", + "tests/data/assets/ndjson/polyline_import.json", + ], +) +def test_polyline_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.58, - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index ec57f0528..2b2ade5d6 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,3 +1,4 @@ +import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -38,6 +39,14 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_radio_classification(): label = Label( @@ -90,3 +99,10 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations[0].model_dump( + exclude_none=True + ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 0e42ab152..66630dbb5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,10 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -12,26 +8,8 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="bbox", - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - }, - value=Rectangle( - start=Point(x=38.0, y=28.0), - end=Point(x=81.0, y=69.0), - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -61,6 +39,8 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, }, ) @@ -68,9 +48,8 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - data = list(NDJsonConverter.serialize([label])) - - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_rectangle_mixed_start_end_points(): @@ -97,13 +76,17 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, + }, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - data = list(NDJsonConverter.serialize([label])) - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index 235b66957..f33719035 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,135 +1,16 @@ import json +from uuid import uuid4 -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Point, - Rectangle, - RelationshipAnnotation, - Relationship, -) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = [ - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - extra={}, - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl-DIFFERENT", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - ] + res = list(NDJsonConverter.deserialize(data)) res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -163,3 +44,29 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] + + +def test_relationship_nonexistent_object(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + relationship_annotation = data[2] + source_uuid = relationship_annotation["relationship"]["source"] + target_uuid = str(uuid4()) + relationship_annotation["relationship"]["target"] = target_uuid + error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" + + with pytest.raises(ValueError, match=error_msg): + list(NDJsonConverter.deserialize(data)) + + +def test_relationship_duplicate_uuids(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + source, target = data[0], data[1] + target["uuid"] = source["uuid"] + error_msg = f"UUID '{source['uuid']}' is not unique" + + with pytest.raises(AssertionError, match=error_msg): + list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 28eba07bd..83ac0da68 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,5 +1,7 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnswer, + Radio, Text, ) from labelbox.data.annotation_types.data import GenericDataRowData @@ -31,3 +33,11 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index fb93f15d4..3e856f001 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,68 +1,21 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, TextEntity - - -def test_text_entity_import(): - with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: - data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_text_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/text_entity_import.json", "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_text_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index c0047412d..b0e277d9d 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -6,7 +6,6 @@ ClassificationAnnotation, ClassificationAnswer, Radio, - Text, ) from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.geometry.line import Line @@ -292,7 +291,10 @@ def test_video(): data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_name_only(): @@ -567,7 +569,9 @@ def test_video_name_only(): data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_classification_global_subclassifications(): @@ -585,6 +589,7 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( + name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -611,7 +616,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = { + expected_second_annotation = nested_checklist_annotation_ndjson = { "name": "nested_checklist_question", "answer": [ { @@ -633,6 +638,12 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + assert annotation.name == label.annotations[i].name + def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -798,6 +809,14 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_video_classification_point(): bbox_annotation = [ @@ -948,6 +967,13 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + def test_video_classification_frameline(): bbox_annotation = [ @@ -1115,289 +1141,9 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - -[ - { - "answer": "a value", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 5, "start": 0}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5islwg200gfci6g0oitaypu", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - { - "classifications": [], - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - } - ] - }, - ], - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - } - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - }, - { - "classifications": [], - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - }, - ] - }, - ], - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "segments": [ - { - "keyframes": [ - { - "bbox": { - "height": 100.0, - "left": 5.0, - "top": 10.0, - "width": 150.0, - }, - "classifications": [], - "frame": 1, - }, - { - "bbox": { - "height": 50.0, - "left": 5.0, - "top": 30.0, - "width": 150.0, - }, - "classifications": [], - "frame": 5, - }, - ] - }, - { - "keyframes": [ - { - "bbox": { - "height": 400.0, - "left": 200.0, - "top": 300.0, - "width": 150.0, - }, - "classifications": [], - "frame": 10, - } - ] - }, - ], - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - }, -] - -[ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}], - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "classifications": [], - "schemaId": "cl5islwg200gfci6g0oitaypu", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - { - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - } - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - "classifications": [], - } - ] - }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - "classifications": [], - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - "classifications": [], - }, - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0, - }, - "classifications": [], - }, - { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0, - }, - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0, - }, - "classifications": [], - } - ] - }, - ], - }, -] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value From 3aa807addecc5cc0f7c90650bf8fe94f344d166f Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 23 Sep 2024 11:53:40 -0700 Subject: [PATCH 05/15] Fix exception type for labeling service test (#1835) --- libs/labelbox/tests/integration/test_labeling_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index 9d5e178b6..fecd75518 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -55,7 +55,7 @@ def test_request_labeling_service_moe_project( labeling_service = project.get_labeling_service() with pytest.raises( - LabelboxError, + MalformedQueryException, match='[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]', ): labeling_service.request() @@ -77,5 +77,5 @@ def test_request_labeling_service_incomplete_requirements(ontology, project): ): # No labeling service by default labeling_service.request() project.connect_ontology(ontology) - with pytest.raises(LabelboxError): + with pytest.raises(MalformedQueryException): labeling_service.request() From 8766f2f0a21b80f30c2904dd33966bf965521778 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 27 Sep 2024 09:45:10 -0700 Subject: [PATCH 06/15] [PLT-1492] Fix 'flaky' tests spotted during a prod run (#1846) --- libs/labelbox/tests/conftest.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index 41a81e94f..84908ac28 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -2,17 +2,8 @@ import os import re import time -<<<<<<< HEAD import uuid from datetime import datetime -======= -from labelbox.schema.project import Project -import requests -from labelbox.schema.ontology import Ontology -import pytest -from types import SimpleNamespace -from typing import Type ->>>>>>> 0bbd7c29 (Vb/fix ontology leaks plt 1379 (#1814)) from enum import Enum from random import randint from string import ascii_letters From 1f744af0ebb8b8edab8512f7cc8443739bba2096 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 1 Oct 2024 14:26:29 -0700 Subject: [PATCH 07/15] [PLT-1611] Vb/placeholder datarows (#1851) --- libs/labelbox/tests/integration/conftest.py | 19 +- .../tests/integration/test_data_rows.py | 166 ++++++++++-------- .../tests/integration/test_mmc_data_rows.py | 58 ++++++ 3 files changed, 171 insertions(+), 72 deletions(-) create mode 100644 libs/labelbox/tests/integration/test_mmc_data_rows.py diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 45056dfb4..211d767b6 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -11,7 +11,6 @@ from typing import List, Tuple, Type import pytest -import requests from labelbox import ( Classification, @@ -832,3 +831,21 @@ def print_perf_summary(): for aaa in islice(sorted_dict, num_of_entries) ] print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) + + +@pytest.fixture +def make_metadata_fields(constants): + msg = "A message" + time = datetime.now(timezone.utc) + + fields = [ + DataRowMetadataField( + schema_id=constants["SPLIT_SCHEMA_ID"], + value=constants["TEST_SPLIT_ID"], + ), + DataRowMetadataField( + schema_id=constants["CAPTURE_DT_SCHEMA_ID"], value=time + ), + DataRowMetadataField(schema_id=constants["TEXT_SCHEMA_ID"], value=msg), + ] + return fields diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 481385e75..b060d5662 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -21,24 +21,17 @@ from labelbox.schema.media_type import MediaType from labelbox.schema.task import Task -SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" -TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" -TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" -CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" -EXPECTED_METADATA_SCHEMA_IDS = [ - SPLIT_SCHEMA_ID, - TEST_SPLIT_ID, - TEXT_SCHEMA_ID, - CAPTURE_DT_SCHEMA_ID, -].sort() -CUSTOM_TEXT_SCHEMA_NAME = "custom_text" - @pytest.fixture -def mdo(client): +def mdo( + client, + constants, +): mdo = client.get_data_row_metadata_ontology() try: - mdo.create_schema(CUSTOM_TEXT_SCHEMA_NAME, DataRowMetadataKind.string) + mdo.create_schema( + constants["CUSTOM_TEXT_SCHEMA_NAME"], DataRowMetadataKind.string + ) except MalformedQueryException: # Do nothing if already exists pass @@ -93,26 +86,18 @@ def tile_content(): } -def make_metadata_fields(): - msg = "A message" - time = datetime.now(timezone.utc) - - fields = [ - DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID), - DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time), - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg), - ] - return fields - - -def make_metadata_fields_dict(): +@pytest.fixture +def make_metadata_fields_dict(constants): msg = "A message" time = datetime.now(timezone.utc) fields = [ - {"schema_id": SPLIT_SCHEMA_ID, "value": TEST_SPLIT_ID}, - {"schema_id": CAPTURE_DT_SCHEMA_ID, "value": time}, - {"schema_id": TEXT_SCHEMA_ID, "value": msg}, + { + "schema_id": constants["SPLIT_SCHEMA_ID"], + "value": constants["TEST_SPLIT_ID"], + }, + {"schema_id": constants["CAPTURE_DT_SCHEMA_ID"], "value": time}, + {"schema_id": constants["TEXT_SCHEMA_ID"], "value": msg}, ] return fields @@ -375,15 +360,22 @@ def test_create_data_row_with_invalid_input(dataset, image_url): dataset.create_data_row("asdf") -def test_create_data_row_with_metadata(mdo, dataset, image_url): +def test_create_data_row_with_metadata( + mdo, + dataset, + image_url, + make_metadata_fields, + constants, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields() + row_data=image_url, metadata_fields=make_metadata_fields ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() @@ -396,19 +388,21 @@ def test_create_data_row_with_metadata(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): +def test_create_data_row_with_metadata_dict( + mdo, dataset, image_url, constants, make_metadata_fields_dict +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields_dict() + row_data=image_url, metadata_fields=make_metadata_fields_dict ) assert len(list(dataset.data_rows())) == 1 @@ -424,25 +418,36 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_row_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) with pytest.raises(ResourceCreationError): dataset.create_data_row(row_data=image_url, metadata_fields=fields) -def test_create_data_rows_with_metadata(mdo, dataset, image_url): +def test_create_data_rows_with_metadata( + mdo, + dataset, + image_url, + constants, + make_metadata_fields, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 @@ -451,22 +456,22 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): { DataRow.row_data: image_url, DataRow.external_id: "row1", - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row2", - "metadata_fields": make_metadata_fields(), + "metadata_fields": make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row3", - DataRow.metadata_fields: make_metadata_fields_dict(), + DataRow.metadata_fields: make_metadata_fields_dict, }, { DataRow.row_data: image_url, DataRow.external_id: "row4", - "metadata_fields": make_metadata_fields_dict(), + "metadata_fields": make_metadata_fields_dict, }, ] ) @@ -488,9 +493,9 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): metadata = row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) @@ -505,14 +510,16 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): ], ) def test_create_data_rows_with_named_metadata_field_class( - test_function, metadata_obj_type, mdo, dataset, image_url + test_function, metadata_obj_type, mdo, dataset, image_url, constants ): row_with_metadata_field = { DataRow.row_data: image_url, DataRow.external_id: "row1", DataRow.metadata_fields: [ DataRowMetadataField(name="split", value="test"), - DataRowMetadataField(name=CUSTOM_TEXT_SCHEMA_NAME, value="hello"), + DataRowMetadataField( + name=constants["CUSTOM_TEXT_SCHEMA_NAME"], value="hello" + ), ], } @@ -521,7 +528,7 @@ def test_create_data_rows_with_named_metadata_field_class( DataRow.external_id: "row2", "metadata_fields": [ {"name": "split", "value": "test"}, - {"name": CUSTOM_TEXT_SCHEMA_NAME, "value": "hello"}, + {"name": constants["CUSTOM_TEXT_SCHEMA_NAME"], "value": "hello"}, ], } @@ -552,21 +559,26 @@ def create_data_row(data_rows): assert len(created_rows[0].metadata) == 2 metadata = created_rows[0].metadata - assert metadata[0].schema_id == SPLIT_SCHEMA_ID + assert metadata[0].schema_id == constants["SPLIT_SCHEMA_ID"] assert metadata[0].name == "test" assert metadata[0].value == mdo.reserved_by_name["split"]["test"].uid - assert metadata[1].name == CUSTOM_TEXT_SCHEMA_NAME + assert metadata[1].name == constants["CUSTOM_TEXT_SCHEMA_NAME"] assert metadata[1].value == "hello" assert ( - metadata[1].schema_id == mdo.custom_by_name[CUSTOM_TEXT_SCHEMA_NAME].uid + metadata[1].schema_id + == mdo.custom_by_name[constants["CUSTOM_TEXT_SCHEMA_NAME"]].uid ) -def test_create_data_rows_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) task = dataset.create_data_rows( @@ -577,13 +589,15 @@ def test_create_data_rows_with_invalid_metadata(dataset, image_url): assert task.status == "COMPLETE" assert len(task.failed_data_rows) == 1 assert ( - f"A schemaId can only be specified once per DataRow : [{TEXT_SCHEMA_ID}]" + f"A schemaId can only be specified once per DataRow : [{constants['TEXT_SCHEMA_ID']}]" in task.failed_data_rows[0]["message"] ) -def test_create_data_rows_with_metadata_missing_value(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_value( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"schemaId": "some schema id"}) with pytest.raises(ValueError) as exc: @@ -598,8 +612,10 @@ def test_create_data_rows_with_metadata_missing_value(dataset, image_url): ) -def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_schema_id( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"value": "some value"}) with pytest.raises(ValueError) as exc: @@ -614,8 +630,10 @@ def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): ) -def test_create_data_rows_with_metadata_wrong_type(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_wrong_type( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append("Neither DataRowMetadataField or dict") with pytest.raises(ValueError) as exc: @@ -899,7 +917,11 @@ def test_does_not_update_not_provided_attachment_fields(data_row): assert attachment.attachment_type == "RAW_TEXT" -def test_create_data_rows_result(client, dataset, image_url): +def test_create_data_rows_result( + client, + dataset, + image_url, +): task = dataset.create_data_rows( [ { @@ -918,12 +940,14 @@ def test_create_data_rows_result(client, dataset, image_url): client.get_data_row(result["id"]) -def test_create_data_rows_local_file(dataset, sample_image): +def test_create_data_rows_local_file( + dataset, sample_image, make_metadata_fields +): task = dataset.create_data_rows( [ { DataRow.row_data: sample_image, - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, } ] ) diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py new file mode 100644 index 000000000..ee457a7fe --- /dev/null +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -0,0 +1,58 @@ +import json +import random + +import pytest + + +@pytest.fixture +def mmc_data_row(dataset, make_metadata_fields, embedding): + row_data = { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": ["root1"], + "actors": {}, + "messages": {}, + } + + vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + embeddings = [{"embedding_id": embedding.id, "vector": vector}] + + content_all = { + "row_data": row_data, + "attachments": [{"type": "RAW_TEXT", "value": "attachment value"}], + "metadata_fields": make_metadata_fields, + "embeddings": embeddings, + } + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + + +def test_mmc(mmc_data_row, embedding, constants): + data_row = mmc_data_row + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": ["root1"], + "actors": {}, + "messages": {}, + } + + metadata_fields = data_row.metadata_fields + metadata = data_row.metadata + assert len(metadata_fields) == 3 + assert len(metadata) == 3 + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() + + attachments = list(data_row.attachments()) + assert len(attachments) == 1 + + assert embedding.get_imported_vector_count() == 1 From 61dc16959c21c8314f33f6d65634959808fe0ad4 Mon Sep 17 00:00:00 2001 From: mnoszczak <99751601+mnoszczak@users.noreply.github.com> Date: Mon, 7 Oct 2024 18:49:11 +0200 Subject: [PATCH 08/15] [PLT-0] Add missing tests (#1855) --- libs/labelbox/tests/data/annotation_import/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 2342a759a..001b96771 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -2399,6 +2399,10 @@ def expected_export_v2_document(): "height": 65.0, "width": 12.0, }, + "page_dimensions": { + "height": 792.0, + "width": 612.0, + }, }, ], "classifications": [ From 63aba36715b6dd2f6d8c7c6338effeda2e22a5b3 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 8 Oct 2024 12:08:09 -0700 Subject: [PATCH 09/15] [PLT-1614] Support data row / batch for live mmc projects (#1856) --- docs/labelbox/datarow_payload_templates.rst | 6 +++ libs/labelbox/src/labelbox/client.py | 45 +++++++++++++--- .../data/annotation_types/collection.py | 10 ++-- .../schema/data_row_payload_templates.py | 40 ++++++++++++++ libs/labelbox/src/labelbox/schema/project.py | 18 +++---- libs/labelbox/tests/integration/conftest.py | 25 +++++++-- .../test_chat_evaluation_ontology_project.py | 34 +++--------- .../tests/integration/test_data_rows.py | 2 +- .../integration/test_labeling_service.py | 5 +- .../tests/integration/test_mmc_data_rows.py | 54 +++++++++++++------ .../integration/test_project_model_config.py | 14 +++-- .../test_project_set_model_setup_complete.py | 8 +-- 12 files changed, 173 insertions(+), 88 deletions(-) create mode 100644 docs/labelbox/datarow_payload_templates.rst create mode 100644 libs/labelbox/src/labelbox/schema/data_row_payload_templates.py diff --git a/docs/labelbox/datarow_payload_templates.rst b/docs/labelbox/datarow_payload_templates.rst new file mode 100644 index 000000000..34dac6111 --- /dev/null +++ b/docs/labelbox/datarow_payload_templates.rst @@ -0,0 +1,6 @@ +Datarow payload templates +=============================================================================================== + +.. automodule:: labelbox.schema.data_row_payload_templates + :members: + :show-inheritance: \ No newline at end of file diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index aa08ab0b3..93f7b2fa7 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -6,6 +6,7 @@ import random import time import urllib.parse +import warnings from collections import defaultdict from datetime import datetime, timezone from types import MappingProxyType @@ -637,6 +638,7 @@ def create_project( } return self._create_project(_CoreProjectInput(**input)) + @overload def create_model_evaluation_project( self, name: str, @@ -649,7 +651,17 @@ def create_model_evaluation_project( is_consensus_enabled: Optional[bool] = None, dataset_id: Optional[str] = None, dataset_name: Optional[str] = None, - data_row_count: int = 100, + data_row_count: Optional[int] = None, + **kwargs, + ) -> Project: + pass + + def create_model_evaluation_project( + self, + dataset_id: Optional[str] = None, + dataset_name: Optional[str] = None, + data_row_count: Optional[int] = None, + **kwargs, ) -> Project: """ Use this method exclusively to create a chat model evaluation project. @@ -674,22 +686,39 @@ def create_model_evaluation_project( >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10) >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. + >>> client.create_model_evaluation_project(name=project_name) + >>> This creates a new project with no data rows. """ - if not dataset_id and not dataset_name: - raise ValueError( - "dataset_name or data_set_id must be present and not be an empty string." - ) + autogenerate_data_rows = False + dataset_name_or_id = None + append_to_existing_dataset = None + + if dataset_id or dataset_name: + autogenerate_data_rows = True if dataset_id: append_to_existing_dataset = True dataset_name_or_id = dataset_id - else: + elif dataset_name: append_to_existing_dataset = False dataset_name_or_id = dataset_name - media_type = MediaType.Conversational - editor_task_type = EditorTaskType.ModelChatEvaluation + if autogenerate_data_rows: + kwargs["dataset_name_or_id"] = dataset_name_or_id + kwargs["append_to_existing_dataset"] = append_to_existing_dataset + if data_row_count is None: + data_row_count = 100 + if data_row_count < 0: + raise ValueError("data_row_count must be a positive integer.") + kwargs["data_row_count"] = data_row_count + warnings.warn( + "Automatic generation of data rows of live model evaluation projects is deprecated. dataset_name_or_id, append_to_existing_dataset, data_row_count will be removed in a future version.", + DeprecationWarning, + ) + + kwargs["media_type"] = MediaType.Conversational + kwargs["editor_task_type"] = EditorTaskType.ModelChatEvaluation.value input = { "name": name, diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 51bcce1b2..42d2a1184 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -1,14 +1,10 @@ import logging -from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Callable, Generator, Iterable, Union, Optional -from uuid import uuid4 import warnings +from typing import Callable, Generator, Iterable, Union -from tqdm import tqdm - -from labelbox.schema import ontology from labelbox.orm.model import Entity -from ..ontology import get_classifications, get_tools +from labelbox.schema import ontology + from ..generator import PrefetchGenerator from .label import Label diff --git a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py new file mode 100644 index 000000000..bf64e055f --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py @@ -0,0 +1,40 @@ +from typing import Dict, List + +from pydantic import BaseModel, Field + +from labelbox.schema.data_row import DataRowMetadataField + + +class ModelEvalutationTemlateRowData(BaseModel): + type: str = Field( + default="application/vnd.labelbox.conversational.model-chat-evaluation", + frozen=True, + ) + draft: bool = Field(default=True, frozen=True) + rootMessageIds: List[str] = Field(default=[]) + actors: Dict = Field(default={}) + version: int = Field(default=2, frozen=True) + messages: Dict = Field(default={}) + + +class ModelEvaluationTemplate(BaseModel): + """ + Use this class to create a model evaluation data row. + + Examples: + >>> data = ModelEvaluationTemplate() + >>> data.row_data.rootMessageIds = ["root1"] + >>> vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + >>> data.embeddings = [...] + >>> data.metadata_fields = [...] + >>> data.attachments = [...] + >>> content = data.model_dump() + >>> task = dataset.create_data_rows([content]) + """ + + row_data: ModelEvalutationTemlateRowData = Field( + default=ModelEvalutationTemlateRowData() + ) + attachments: List[Dict] = Field(default=[]) + embeddings: List[Dict] = Field(default=[]) + metadata_fields: List[DataRowMetadataField] = Field(default=[]) diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 96b4cc6f8..c30727bfd 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -13,7 +13,6 @@ List, Optional, Tuple, - TypeVar, Union, get_args, ) @@ -23,7 +22,6 @@ InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, error_message_for_unparsed_graphql_error, ) # type: ignore @@ -59,7 +57,6 @@ from labelbox.schema.model_config import ModelConfig from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) from labelbox.schema.project_model_config import ProjectModelConfig @@ -577,7 +574,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -745,7 +742,9 @@ def create_batch( lbox.exceptions.ValueError if a project is not batch mode, if the project is auto data generation, if the batch exceeds 100k data rows """ - if self.is_auto_data_generation(): + if ( + self.is_auto_data_generation() and not self.is_chat_evaluation() + ): # NOTE live chat evaluatiuon projects in sdk do not pre-generate data rows, but use batch as all other projects raise ValueError( "Cannot create batches for auto data generation projects" ) @@ -771,7 +770,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1039,8 +1038,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1262,7 +1260,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1442,7 +1440,7 @@ def move_data_rows_to_task_queue( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 211d767b6..eb23e4ad4 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -631,11 +631,28 @@ def chat_evaluation_ontology(client, rand_gen): @pytest.fixture -def live_chat_evaluation_project_with_new_dataset(client, rand_gen): +def live_chat_evaluation_project(client, rand_gen): project_name = f"test-model-evaluation-project-{rand_gen(str)}" - dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}" - project = client.create_model_evaluation_project( - name=project_name, dataset_name=dataset_name, data_row_count=1 + project = client.create_model_evaluation_project(name=project_name) + + yield project + + project.delete() + + +@pytest.fixture +def live_chat_evaluation_project_with_batch( + client, + rand_gen, + live_chat_evaluation_project, + offline_conversational_data_row, +): + project_name = f"test-model-evaluation-project-{rand_gen(str)}" + project = client.create_model_evaluation_project(name=project_name) + + project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects ) yield project diff --git a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py index 3e462d677..2c02b77ac 100644 --- a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py +++ b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py @@ -1,5 +1,3 @@ -from unittest.mock import patch - import pytest from labelbox import MediaType @@ -7,9 +5,8 @@ def test_create_chat_evaluation_ontology_project( - client, chat_evaluation_ontology, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, offline_conversational_data_row, rand_gen, ): @@ -28,7 +25,7 @@ def test_create_chat_evaluation_ontology_project( assert classification.schema_id assert classification.feature_schema_id - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project assert project.model_setup_complete is None project.connect_ontology(ontology) @@ -36,28 +33,11 @@ def test_create_chat_evaluation_ontology_project( assert project.labeling_frontend().name == "Editor" assert project.ontology().name == ontology.name - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - project.create_batch( - rand_gen(str), - [offline_conversational_data_row.uid], # sample of data row objects - ) - - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - with patch( - "labelbox.schema.project.MAX_SYNC_BATCH_ROW_COUNT", new=0 - ): # force to async - project.create_batch( - rand_gen(str), - [ - offline_conversational_data_row.uid - ], # sample of data row objects - ) + batch = project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects + ) + assert batch def test_create_chat_evaluation_ontology_project_existing_dataset( diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index b060d5662..485719575 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -405,7 +405,7 @@ def test_create_data_row_with_metadata_dict( row_data=image_url, metadata_fields=make_metadata_fields_dict ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index fecd75518..fc604ac8a 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -43,12 +43,11 @@ def test_request_labeling_service_moe_offline_project( def test_request_labeling_service_moe_project( - rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project_with_batch, chat_evaluation_ontology, model_config, ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project_with_batch project.connect_ontology(chat_evaluation_ontology) project.upsert_instructions("tests/integration/media/sample_pdf.pdf") diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py index ee457a7fe..3b4f95530 100644 --- a/libs/labelbox/tests/integration/test_mmc_data_rows.py +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -3,26 +3,35 @@ import pytest +from labelbox.schema.data_row_payload_templates import ModelEvaluationTemplate + @pytest.fixture -def mmc_data_row(dataset, make_metadata_fields, embedding): - row_data = { - "type": "application/vnd.labelbox.conversational.model-chat-evaluation", - "draft": True, - "rootMessageIds": ["root1"], - "actors": {}, - "messages": {}, - } +def mmc_data_row(dataset): + data = ModelEvaluationTemplate() + + content_all = data.model_dump() + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + +@pytest.fixture +def mmc_data_row_all(dataset, make_metadata_fields, embedding): + data = ModelEvaluationTemplate() + data.row_data.rootMessageIds = ["root1"] vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] - embeddings = [{"embedding_id": embedding.id, "vector": vector}] + data.embeddings = [{"embedding_id": embedding.id, "vector": vector}] + data.metadata_fields = make_metadata_fields + data.attachments = [{"type": "RAW_TEXT", "value": "attachment value"}] - content_all = { - "row_data": row_data, - "attachments": [{"type": "RAW_TEXT", "value": "attachment value"}], - "metadata_fields": make_metadata_fields, - "embeddings": embeddings, - } + content_all = data.model_dump() task = dataset.create_data_rows([content_all]) task.wait_till_done() assert task.status == "COMPLETE" @@ -34,14 +43,27 @@ def mmc_data_row(dataset, make_metadata_fields, embedding): data_row.delete() -def test_mmc(mmc_data_row, embedding, constants): +def test_mmc(mmc_data_row): data_row = mmc_data_row + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": [], + "actors": {}, + "messages": {}, + "version": 2, + } + + +def test_mmc_all(mmc_data_row_all, embedding, constants): + data_row = mmc_data_row_all assert json.loads(data_row.row_data) == { "type": "application/vnd.labelbox.conversational.model-chat-evaluation", "draft": True, "rootMessageIds": ["root1"], "actors": {}, "messages": {}, + "version": 2, } metadata_fields = data_row.metadata_fields diff --git a/libs/labelbox/tests/integration/test_project_model_config.py b/libs/labelbox/tests/integration/test_project_model_config.py index f86bbb38e..f1646dfc0 100644 --- a/libs/labelbox/tests/integration/test_project_model_config.py +++ b/libs/labelbox/tests/integration/test_project_model_config.py @@ -2,10 +2,8 @@ from lbox.exceptions import ResourceNotFoundError -def test_add_single_model_config( - live_chat_evaluation_project_with_new_dataset, model_config -): - configured_project = live_chat_evaluation_project_with_new_dataset +def test_add_single_model_config(live_chat_evaluation_project, model_config): + configured_project = live_chat_evaluation_project project_model_config_id = configured_project.add_model_config( model_config.uid ) @@ -22,11 +20,11 @@ def test_add_single_model_config( def test_add_multiple_model_config( client, rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, model_config, valid_model_id, ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project second_model_config = client.create_model_config( rand_gen(str), valid_model_id, {"param": "value"} ) @@ -52,9 +50,9 @@ def test_add_multiple_model_config( def test_delete_project_model_config( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project assert configured_project.delete_project_model_config( configured_project.add_model_config(model_config.uid) ) diff --git a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py index 8872a27f4..30e179028 100644 --- a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py +++ b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py @@ -3,9 +3,9 @@ def test_live_chat_evaluation_project( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project.set_project_model_setup_complete() assert bool(project.model_setup_complete) is True @@ -18,9 +18,9 @@ def test_live_chat_evaluation_project( def test_live_chat_evaluation_project_delete_cofig( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project_model_config_id = project.add_model_config(model_config.uid) assert project_model_config_id From d155c1f50ce459badb1744add8d909bf4f55eeb0 Mon Sep 17 00:00:00 2001 From: sfendell-labelbox <150080555+sfendell-labelbox@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:32:41 -0700 Subject: [PATCH 10/15] Simplify some code. (#1857) --- libs/labelbox/src/labelbox/client.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 93f7b2fa7..109e3ae55 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -690,21 +690,10 @@ def create_model_evaluation_project( >>> This creates a new project with no data rows. """ - autogenerate_data_rows = False - dataset_name_or_id = None - append_to_existing_dataset = None + dataset_name_or_id = dataset_id or dataset_name + append_to_existing_dataset = bool(dataset_id) - if dataset_id or dataset_name: - autogenerate_data_rows = True - - if dataset_id: - append_to_existing_dataset = True - dataset_name_or_id = dataset_id - elif dataset_name: - append_to_existing_dataset = False - dataset_name_or_id = dataset_name - - if autogenerate_data_rows: + if dataset_name_or_id: kwargs["dataset_name_or_id"] = dataset_name_or_id kwargs["append_to_existing_dataset"] = append_to_existing_dataset if data_row_count is None: From 9884d54827d4fd026f1032fed5c79bd71d63b057 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 09:00:29 -0700 Subject: [PATCH 11/15] Address QA feedback (#1859) --- .../src/labelbox/schema/data_row_payload_templates.py | 9 +++++---- libs/labelbox/tests/integration/test_mmc_data_rows.py | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py index bf64e055f..2e2728daa 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py +++ b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py @@ -1,11 +1,11 @@ -from typing import Dict, List +from typing import Dict, List, Optional from pydantic import BaseModel, Field from labelbox.schema.data_row import DataRowMetadataField -class ModelEvalutationTemlateRowData(BaseModel): +class ModelEvalutationTemplateRowData(BaseModel): type: str = Field( default="application/vnd.labelbox.conversational.model-chat-evaluation", frozen=True, @@ -15,6 +15,7 @@ class ModelEvalutationTemlateRowData(BaseModel): actors: Dict = Field(default={}) version: int = Field(default=2, frozen=True) messages: Dict = Field(default={}) + global_key: Optional[str] = None class ModelEvaluationTemplate(BaseModel): @@ -32,8 +33,8 @@ class ModelEvaluationTemplate(BaseModel): >>> task = dataset.create_data_rows([content]) """ - row_data: ModelEvalutationTemlateRowData = Field( - default=ModelEvalutationTemlateRowData() + row_data: ModelEvalutationTemplateRowData = Field( + default=ModelEvalutationTemplateRowData() ) attachments: List[Dict] = Field(default=[]) embeddings: List[Dict] = Field(default=[]) diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py index 3b4f95530..2fa7bdd1b 100644 --- a/libs/labelbox/tests/integration/test_mmc_data_rows.py +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -10,7 +10,7 @@ def mmc_data_row(dataset): data = ModelEvaluationTemplate() - content_all = data.model_dump() + content_all = data.model_dump(exclude_none=True) task = dataset.create_data_rows([content_all]) task.wait_till_done() assert task.status == "COMPLETE" @@ -26,12 +26,13 @@ def mmc_data_row(dataset): def mmc_data_row_all(dataset, make_metadata_fields, embedding): data = ModelEvaluationTemplate() data.row_data.rootMessageIds = ["root1"] + data.row_data.global_key = "global_key" vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] data.embeddings = [{"embedding_id": embedding.id, "vector": vector}] data.metadata_fields = make_metadata_fields data.attachments = [{"type": "RAW_TEXT", "value": "attachment value"}] - content_all = data.model_dump() + content_all = data.model_dump(exclude_none=True) task = dataset.create_data_rows([content_all]) task.wait_till_done() assert task.status == "COMPLETE" @@ -64,6 +65,7 @@ def test_mmc_all(mmc_data_row_all, embedding, constants): "actors": {}, "messages": {}, "version": 2, + "globalKey": "global_key", } metadata_fields = data_row.metadata_fields From 8276fc682253aee6286b45ab803160f2399d6c72 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 10:14:27 -0700 Subject: [PATCH 12/15] Release v.5.2.0 prep (#1860) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 6 ++- libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 67 +++++++++++++++++--------- 4 files changed, 52 insertions(+), 25 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 51648857e..5f1d50567 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '5.1.0' +release = '5.2.0' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index 6b23cf6bc..781e90ba4 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog -# Version 5.1.0 (2024-09-27) +# Version 5.2.0 (2024-10-09) ## Fixed +* Support data row / batch for live mmc projects([#1856](https://github.com/Labelbox/labelbox-python/pull/1856)) + +# Version 5.1.0 (2024-09-27) +## Added * Support self-signed SSL certs([#1811](https://github.com/Labelbox/labelbox-python/pull/1811)) * Rectangle units now correctly support percent inputs([#1848](https://github.com/Labelbox/labelbox-python/pull/1848)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index fa64c74f6..d4ffebf4b 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "5.1.0" +version = "5.2.0" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index c5212f194..d01ee8c7e 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,46 +1,59 @@ name = "labelbox" -__version__ = "5.1.0" +__version__ = "5.2.0" from labelbox.client import Client -from labelbox.schema.project import Project -from labelbox.schema.model import Model -from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( + LabelImport, MALPredictionImport, MEAPredictionImport, - LabelImport, MEAToMALPredictionImport, ) -from labelbox.schema.dataset import Dataset -from labelbox.schema.data_row import DataRow +from labelbox.schema.asset_attachment import AssetAttachment +from labelbox.schema.batch import Batch +from labelbox.schema.benchmark import Benchmark +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRow +from labelbox.schema.data_row_metadata import ( + DataRowMetadata, + DataRowMetadataField, + DataRowMetadataOntology, + DeleteDataRowMetadata, +) +from labelbox.schema.dataset import Dataset from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.label import Label -from labelbox.schema.batch import Batch -from labelbox.schema.review import Review -from labelbox.schema.user import User -from labelbox.schema.organization import Organization -from labelbox.schema.task import Task from labelbox.schema.export_task import ( - StreamType, + BufferedJsonConverterOutput, ExportTask, BufferedJsonConverterOutput, ) +from labelbox.schema.iam_integration import IAMIntegration +from labelbox.schema.identifiable import GlobalKey, UniqueId +from labelbox.schema.identifiables import DataRowIds, GlobalKeys, UniqueIds +from labelbox.schema.invite import Invite, InviteLimit +from labelbox.schema.label import Label +from labelbox.schema.label_score import LabelScore from labelbox.schema.labeling_frontend import ( LabelingFrontend, LabelingFrontendOptions, ) -from labelbox.schema.asset_attachment import AssetAttachment -from labelbox.schema.webhook import Webhook +from labelbox.schema.labeling_service import LabelingService +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.media_type import MediaType +from labelbox.schema.model import Model +from labelbox.schema.model_config import ModelConfig +from labelbox.schema.model_run import DataSplit, ModelRun from labelbox.schema.ontology import ( + Classification, + FeatureSchema, Ontology, OntologyBuilder, - Classification, Option, + PromptResponseClassification, + ResponseOption, Tool, - FeatureSchema, ) from labelbox.schema.ontology import PromptResponseClassification from labelbox.schema.ontology import ResponseOption @@ -65,10 +78,20 @@ from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds from labelbox.schema.identifiable import UniqueId, GlobalKey from labelbox.schema.ontology_kind import OntologyKind +from labelbox.schema.organization import Organization +from labelbox.schema.project import Project +from labelbox.schema.project_model_config import ProjectModelConfig from labelbox.schema.project_overview import ( ProjectOverview, ProjectOverviewDetailed, ) -from labelbox.schema.labeling_service import LabelingService -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.project_resource_tag import ProjectResourceTag +from labelbox.schema.queue_mode import QueueMode +from labelbox.schema.resource_tag import ResourceTag +from labelbox.schema.review import Review +from labelbox.schema.role import ProjectRole, Role +from labelbox.schema.slice import CatalogSlice, ModelSlice, Slice +from labelbox.schema.task import Task +from labelbox.schema.task_queue import TaskQueue +from labelbox.schema.user import User +from labelbox.schema.webhook import Webhook From cb66ac262ed2459ca1c21e9fd640b795db4cacc5 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 17:33:11 -0700 Subject: [PATCH 13/15] Fix encoding for exporter (#1862) --- libs/labelbox/src/labelbox/schema/export_task.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/export_task.py b/libs/labelbox/src/labelbox/schema/export_task.py index b2e1d054f..7e78fc3e9 100644 --- a/libs/labelbox/src/labelbox/schema/export_task.py +++ b/libs/labelbox/src/labelbox/schema/export_task.py @@ -1,9 +1,14 @@ +import json +import os +import tempfile +import warnings from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum from functools import lru_cache -import json from typing import ( + TYPE_CHECKING, + Any, Callable, Generic, Iterator, @@ -11,17 +16,13 @@ Tuple, TypeVar, Union, - TYPE_CHECKING, - Any, ) import requests -import tempfile -import os +from pydantic import BaseModel from labelbox.schema.task import Task from labelbox.utils import _CamelCaseMixin -from pydantic import BaseModel if TYPE_CHECKING: from labelbox import Client @@ -120,6 +121,7 @@ def _get_file_content( ) response = requests.get(file_info.file, timeout=30) response.raise_for_status() + response.encoding = "utf-8" assert ( len(response.content) == file_info.offsets.end - file_info.offsets.start + 1 From 836902738aa3924750ca25ef0b74f4587a08b480 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 17:36:50 -0700 Subject: [PATCH 14/15] Release v.5.2.1 prep (#1863) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 6 +++++- libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 5f1d50567..07656e3a0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '5.2.0' +release = '5.2.1' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index 781e90ba4..c6a21580c 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog -# Version 5.2.0 (2024-10-09) +# Version 5.2.1 (2024-10-09) ## Fixed +* Exporter encoding + +# Version 5.2.0 (2024-10-09) +## Added * Support data row / batch for live mmc projects([#1856](https://github.com/Labelbox/labelbox-python/pull/1856)) # Version 5.1.0 (2024-09-27) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index d4ffebf4b..06a75ec19 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "5.2.0" +version = "5.2.1" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index d01ee8c7e..9da8edc17 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,6 +1,6 @@ name = "labelbox" -__version__ = "5.2.0" +__version__ = "5.2.1" from labelbox.client import Client from labelbox.schema.annotation_import import ( From 87540cd3c9e501b71ef7400f76c4b07b0e4f8077 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 10 Oct 2024 14:10:14 -0700 Subject: [PATCH 15/15] Fix lint --- libs/labelbox/src/labelbox/schema/__init__.py | 1 - .../labelbox/schema/bulk_import_request.py | 1004 ----------------- libs/labelbox/src/labelbox/schema/project.py | 1 + 3 files changed, 1 insertion(+), 1005 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 616565931..232741b5d 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,7 +1,6 @@ import labelbox.schema.asset_attachment import labelbox.schema.annotation_import import labelbox.schema.asset_attachment -import labelbox.schema.bulk_import_request import labelbox.schema.annotation_import import labelbox.schema.benchmark import labelbox.schema.data_row diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py deleted file mode 100644 index 8e11f3261..000000000 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ /dev/null @@ -1,1004 +0,0 @@ -import json -import time -from uuid import UUID, uuid4 -import functools - -import logging -from pathlib import Path -from google.api_core import retry -from labelbox import parser -import requests -from pydantic import ( - ValidationError, - BaseModel, - Field, - field_validator, - model_validator, - ConfigDict, - StringConstraints, -) -from typing_extensions import Literal, Annotated -from typing import ( - Any, - List, - Optional, - BinaryIO, - Dict, - Iterable, - Tuple, - Union, - Type, - Set, - TYPE_CHECKING, -) - -from labelbox import exceptions as lb_exceptions -from labelbox import utils -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Relationship -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.serialization import serialize_labels -from labelbox.orm.model import Field as lb_Field - -if TYPE_CHECKING: - from labelbox import Project - from labelbox.types import Label - -NDJSON_MIME_TYPE = "application/x-ndjson" -logger = logging.getLogger(__name__) - -# TODO: Deprecate this library in place of labelimport and malprediction import library. - - -def _determinants(parent_cls: Any) -> List[str]: - return [ - k - for k, v in parent_cls.model_fields.items() - if v.json_schema_extra and "determinant" in v.json_schema_extra - ] - - -def _make_file_name(project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - -# TODO(gszpak): move it to client.py -def _make_request_data( - project_id: str, name: str, content_length: int, file_name: str -) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % query.results_query_part(BulkImportRequest) - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length, - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})), - } - - -def _send_create_file_command( - client, - request_data: dict, - file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str], -) -> dict: - response = client.execute(data=request_data, files={file_name: file_data}) - - if not response.get("createBulkImportRequest", None): - raise lb_exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" - % response.get("errors", None) - or response.get("error", None) - ) - - return response - - -class BulkImportRequest(DbObject): - """Represents the import job when importing annotations. - - Attributes: - name (str) - state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) - input_file_url (str): URL to your web-hosted NDJSON file - error_file_url (str): NDJSON that contains error messages for failed annotations - status_file_url (str): NDJSON that contains status for each annotation - created_at (datetime): UTC timestamp for date BulkImportRequest was created - - project (Relationship): `ToOne` relationship to Project - created_by (Relationship): `ToOne` relationship to User - """ - - name = lb_Field.String("name") - state = lb_Field.Enum(BulkImportRequestState, "state") - input_file_url = lb_Field.String("input_file_url") - error_file_url = lb_Field.String("error_file_url") - status_file_url = lb_Field.String("status_file_url") - created_at = lb_Field.DateTime("created_at") - - project = Relationship.ToOne("Project") - created_by = Relationship.ToOne("User", False, "created_by") - - @property - def inputs(self) -> List[Dict[str, Any]]: - """ - Inputs for each individual annotation uploaded. - This should match the ndjson annotations that you have uploaded. - - Returns: - Uploaded ndjson. - - * This information will expire after 24 hours. - """ - return self._fetch_remote_ndjson(self.input_file_url) - - @property - def errors(self) -> List[Dict[str, Any]]: - """ - Errors for each individual annotation uploaded. This is a subset of statuses - - Returns: - List of dicts containing error messages. Empty list means there were no errors - See `BulkImportRequest.statuses` for more details. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.error_file_url) - - @property - def statuses(self) -> List[Dict[str, Any]]: - """ - Status for each individual annotation uploaded. - - Returns: - A status for each annotation if the upload is done running. - See below table for more details - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - Field - - Description - * - uuid - - Specifies the annotation for the status row. - * - dataRow - - JSON object containing the Labelbox data row ID for the annotation. - * - status - - Indicates SUCCESS or FAILURE. - * - errors - - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.status_file_url) - - @functools.lru_cache() - def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: - """ - Fetches the remote ndjson file and caches the results. - - Args: - url (str): Can be any url pointing to an ndjson file. - Returns: - ndjson as a list of dicts. - """ - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.text) - - def refresh(self) -> None: - """Synchronizes values of all fields with the database.""" - query_str, params = query.get_single(BulkImportRequest, self.uid) - res = self.client.execute(query_str, params) - res = res[utils.camel_case(BulkImportRequest.type_name())] - self._set_field_values(res) - - def wait_till_done(self, sleep_time_seconds: int = 5) -> None: - self.wait_until_done(sleep_time_seconds) - - def wait_until_done(self, sleep_time_seconds: int = 5) -> None: - """Blocks import job until certain conditions are met. - - Blocks until the BulkImportRequest.state changes either to - `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, - periodically refreshing object's state. - - Args: - sleep_time_seconds (str): a time to block between subsequent API calls - """ - while self.state == BulkImportRequestState.RUNNING: - logger.info(f"Sleeping for {sleep_time_seconds} seconds...") - time.sleep(sleep_time_seconds) - self.__exponential_backoff_refresh() - - @retry.Retry( - predicate=retry.if_exception_type( - lb_exceptions.ApiLimitError, - lb_exceptions.TimeoutError, - lb_exceptions.NetworkError, - ) - ) - def __exponential_backoff_refresh(self) -> None: - self.refresh() - - @classmethod - def from_name( - cls, client, project_id: str, name: str - ) -> "BulkImportRequest": - """Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name} - response = client.execute(query_str, params=params) - return cls(client, response["bulkImportRequest"]) - - @classmethod - def create_from_url( - cls, client, project_id: str, name: str, url: str, validate=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - validate (bool): a flag indicating if there should be a validation - if `url` is valid ndjson - Returns: - BulkImportRequest object - """ - if validate: - logger.warn( - "Validation is turned on. The file will be downloaded locally and processed before uploading." - ) - res = requests.get(url) - data = parser.loads(res.text) - _validate_ndjson(data, client.get_project(project_id)) - - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls( - client, bulk_import_request_response["createBulkImportRequest"] - ) - - @classmethod - def create_from_objects( - cls, - client, - project_id: str, - name: str, - predictions: Union[Iterable[Dict], Iterable["Label"]], - validate=True, - ) -> "BulkImportRequest": - """ - Creates a `BulkImportRequest` from an iterable of dictionaries. - - Conforms to JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - validate (bool): a flag indicating if there should be a validation - if `predictions` is valid ndjson - Returns: - BulkImportRequest object - """ - if not isinstance(predictions, list): - raise TypeError( - f"annotations must be in a form of Iterable. Found {type(predictions)}" - ) - ndjson_predictions = serialize_labels(predictions) - - if validate: - _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - - data_str = parser.dumps(ndjson_predictions) - if not data_str: - raise ValueError("annotations cannot be empty") - - data = data_str.encode("utf-8") - file_name = _make_file_name(project_id, name) - request_data = _make_request_data( - project_id, name, len(data_str), file_name - ) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, - request_data=request_data, - file_name=file_name, - file_data=file_data, - ) - - return cls(client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file( - cls, client, project_id: str, name: str, file: Path, validate_file=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = _make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = _make_request_data( - project_id, name, content_length, file_name - ) - - with file.open("rb") as f: - if validate_file: - reader = parser.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - _validate_ndjson(reader, client.get_project(project_id)) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, request_data, file_name, file_data - ) - return cls(client, response_data["createBulkImportRequest"]) - - def delete(self) -> None: - """Deletes the import job and also any annotations created by this import. - - Returns: - None - """ - id_param = "bulk_request_id" - query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { - deleteBulkImportRequest(where: {id: $%s}) { - id - name - } - }""" % (id_param, id_param) - self.client.execute(query_str, {id_param: self.uid}) - - -def _validate_ndjson( - lines: Iterable[Dict[str, Any]], project: "Project" -) -> None: - """ - Client side validation of an ndjson object. - - Does not guarentee that an upload will succeed for the following reasons: - * We are not checking the data row types which will cause the following errors to slip through - * Missing frame indices will not causes an error for videos - * Uploaded annotations for the wrong data type will pass (Eg. entity on images) - * We are not checking bounds of an asset (Eg. frame index, image height, text location) - - Args: - lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines - project (Project): id of project for which predictions will be imported - - Raises: - MALValidationError: Raise for invalid NDJson - UuidError: Duplicate UUID in upload - """ - feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( - project.ontology() - ) - uids: Set[str] = set() - for idx, line in enumerate(lines): - try: - annotation = NDAnnotation(**line) - annotation.validate_instance( - feature_schemas_by_id, feature_schemas_by_name - ) - uuid = str(annotation.uuid) - if uuid in uids: - raise lb_exceptions.UuidError( - f"{uuid} already used in this import job, " - "must be unique for the project." - ) - uids.add(uuid) - except (ValidationError, ValueError, TypeError, KeyError) as e: - raise lb_exceptions.MALValidationError( - f"Invalid NDJson on line {idx}" - ) from e - - -# The rest of this file contains objects for MAL validation -def parse_classification(tool): - """ - Parses a classification from an ontology. Only radio, checklist, and text are supported for mal - - Args: - tool (dict) - - Returns: - dict - """ - if tool["type"] in ["radio", "checklist"]: - option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] - option_names = [r["value"] for r in tool["options"]] - return { - "tool": tool["type"], - "featureSchemaId": tool["featureSchemaId"], - "name": tool["name"], - "options": [*option_schema_ids, *option_names], - } - elif tool["type"] == "text": - return { - "tool": tool["type"], - "name": tool["name"], - "featureSchemaId": tool["featureSchemaId"], - } - - -def get_mal_schemas(ontology): - """ - Converts a project ontology to a dict for easier lookup during ndjson validation - - Args: - ontology (Ontology) - Returns: - Dict, Dict : Useful for looking up a tool from a given feature schema id or name - """ - - valid_feature_schemas_by_schema_id = {} - valid_feature_schemas_by_name = {} - for tool in ontology.normalized["tools"]: - classifications = [ - parse_classification(classification_tool) - for classification_tool in tool["classifications"] - ] - classifications_by_schema_id = { - v["featureSchemaId"]: v for v in classifications - } - classifications_by_name = {v["name"]: v for v in classifications} - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - valid_feature_schemas_by_name[tool["name"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - for tool in ontology.normalized["classifications"]: - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( - parse_classification(tool) - ) - valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) - return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name - - -class Bbox(BaseModel): - top: float - left: float - height: float - width: float - - -class Point(BaseModel): - x: float - y: float - - -class FrameLocation(BaseModel): - end: int - start: int - - -class VideoSupported(BaseModel): - # Note that frames are only allowed as top level inferences for video - frames: Optional[List[FrameLocation]] = None - - -# Base class for a special kind of union. -class SpecialUnion: - def __new__(cls, **kwargs): - return cls.build(kwargs) - - @classmethod - def __get_validators__(cls): - yield cls.build - - @classmethod - def get_union_types(cls): - if not issubclass(cls, SpecialUnion): - raise TypeError("{} must be a subclass of SpecialUnion") - - union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] - if len(union_types) < 1: - raise TypeError( - "Class {cls} should inherit from a union of objects to build" - ) - if len(union_types) > 1: - raise TypeError( - f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" - ) - return union_types[0].__args__[0].__args__ - - @classmethod - def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": - """ - Checks through all objects in the union to see which matches the input data. - Args: - data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union - raises: - KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion - ValidationError: Error while trying to construct a specific object in the union - - """ - if isinstance(data, BaseModel): - data = data.model_dump() - - top_level_fields = [] - max_match = 0 - matched = None - - for type_ in cls.get_union_types(): - determinate_fields = _determinants(type_) - top_level_fields.append(determinate_fields) - matches = sum([val in determinate_fields for val in data]) - if matches == len(determinate_fields) and matches > max_match: - max_match = matches - matched = type_ - - if matched is not None: - # These two have the exact same top level keys - if matched in [NDRadio, NDText]: - if isinstance(data["answer"], dict): - matched = NDRadio - elif isinstance(data["answer"], str): - matched = NDText - else: - raise TypeError( - f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" - ) - return matched(**data) - else: - raise KeyError( - f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." - ) - - @classmethod - def schema(cls): - results = {"definitions": {}} - for cl in cls.get_union_types(): - schema = cl.schema() - results["definitions"].update(schema.pop("definitions")) - results[cl.__name__] = schema - return results - - -class DataRow(BaseModel): - id: str - - -class NDFeatureSchema(BaseModel): - schemaId: Optional[str] = None - name: Optional[str] = None - - @model_validator(mode="after") - def most_set_one(self): - if self.schemaId is None and self.name is None: - raise ValueError( - "Must set either schemaId or name for all feature schemas" - ) - return self - - -class NDBase(NDFeatureSchema): - ontology_type: str - uuid: UUID - dataRow: DataRow - model_config = ConfigDict(extra="forbid") - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - if self.name: - if self.name not in valid_feature_schemas_by_name: - raise ValueError( - f"Name {self.name} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_name[self.name]["tool"] - ): - raise ValueError( - f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" - ) - - if self.schemaId: - if self.schemaId not in valid_feature_schemas_by_id: - raise ValueError( - f"Schema id {self.schemaId} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_id[self.schemaId]["tool"] - ): - raise ValueError( - f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" - ) - - def validate_instance( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - self.validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - - -###### Classifications ###### - - -class NDText(NDBase): - ontology_type: Literal["text"] = "text" - answer: str = Field(json_schema_extra={"determinant": True}) - # No feature schema to check - - -class NDChecklist(VideoSupported, NDBase): - ontology_type: Literal["checklist"] = "checklist" - answers: List[NDFeatureSchema] = Field( - json_schema_extra={"determinant": True} - ) - - @field_validator("answers", mode="before") - def validate_answers(cls, value, field): - # constr not working with mypy. - if not len(value): - raise ValueError("Checklist answers should not be empty") - return value - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - # Test top level feature schema for this tool - super(NDChecklist, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - # Test the feature schemas provided to the answer field - if len( - set([answer.name or answer.schemaId for answer in self.answers]) - ) != len(self.answers): - raise ValueError( - f"Duplicated featureSchema found for checklist {self.uuid}" - ) - for answer in self.answers: - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if answer.name not in options and answer.schemaId not in options: - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" - ) - - -class NDRadio(VideoSupported, NDBase): - ontology_type: Literal["radio"] = "radio" - answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDRadio, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if ( - self.answer.name not in options - and self.answer.schemaId not in options - ): - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" - ) - - -# A union with custom construction logic to improve error messages -class NDClassification( - SpecialUnion, - Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore -): ... - - -###### Tools ###### - - -class NDBaseTool(NDBase): - classifications: List[NDClassification] = [] - - # This is indepdent of our problem - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDBaseTool, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - for classification in self.classifications: - classification.validate_feature_schemas( - valid_feature_schemas_by_name[self.name][ - "classificationsBySchemaId" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsBySchemaId" - ], - valid_feature_schemas_by_name[self.name][ - "classificationsByName" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsByName" - ], - ) - - @field_validator("classifications", mode="before") - def validate_subclasses(cls, value, field): - # Create uuid and datarow id so we don't have to define classification objects twice - # This is caused by the fact that we require these ids for top level classifications but not for subclasses - results = [] - dummy_id = "child".center(25, "_") - for row in value: - results.append( - {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} - ) - return results - - -class NDPolygon(NDBaseTool): - ontology_type: Literal["polygon"] = "polygon" - polygon: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("polygon") - def is_geom_valid(cls, v): - if len(v) < 3: - raise ValueError( - f"A polygon must have at least 3 points to be valid. Found {v}" - ) - return v - - -class NDPolyline(NDBaseTool): - ontology_type: Literal["line"] = "line" - line: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("line") - def is_geom_valid(cls, v): - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - return v - - -class NDRectangle(NDBaseTool): - ontology_type: Literal["rectangle"] = "rectangle" - bbox: Bbox = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class NDPoint(NDBaseTool): - ontology_type: Literal["point"] = "point" - point: Point = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class EntityLocation(BaseModel): - start: int - end: int - - -class NDTextEntity(NDBaseTool): - ontology_type: Literal["named-entity"] = "named-entity" - location: EntityLocation = Field(json_schema_extra={"determinant": True}) - - @field_validator("location") - def is_valid_location(cls, v): - if isinstance(v, BaseModel): - v = v.model_dump() - - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - if v["start"] < 0: - raise ValueError(f"Text location must be positive. Found {v}") - if v["start"] > v["end"]: - raise ValueError( - f"Text start location must be less or equal than end. Found {v}" - ) - return v - - -class RLEMaskFeatures(BaseModel): - counts: List[int] - size: List[int] - - @field_validator("counts") - def validate_counts(cls, counts): - if not all([count >= 0 for count in counts]): - raise ValueError( - "Found negative value for counts. They should all be zero or positive" - ) - return counts - - @field_validator("size") - def validate_size(cls, size): - if len(size) != 2: - raise ValueError( - f"Mask `size` should have two ints representing height and with. Found : {size}" - ) - if not all([count > 0 for count in size]): - raise ValueError( - f"Mask `size` should be a postitive int. Found : {size}" - ) - return size - - -class PNGMaskFeatures(BaseModel): - # base64 encoded png bytes - png: str - - -class URIMaskFeatures(BaseModel): - instanceURI: str - colorRGB: Union[List[int], Tuple[int, int, int]] - - @field_validator("colorRGB") - def validate_color(cls, colorRGB): - # Does the dtype matter? Can it be a float? - if not isinstance(colorRGB, (tuple, list)): - raise ValueError( - f"Received color that is not a list or tuple. Found : {colorRGB}" - ) - elif len(colorRGB) != 3: - raise ValueError( - f"Must provide RGB values for segmentation colors. Found : {colorRGB}" - ) - elif not all([0 <= color <= 255 for color in colorRGB]): - raise ValueError( - f"All rgb colors must be between 0 and 255. Found : {colorRGB}" - ) - return colorRGB - - -class NDMask(NDBaseTool): - ontology_type: Literal["superpixel"] = "superpixel" - mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( - json_schema_extra={"determinant": True} - ) - - -# A union with custom construction logic to improve error messages -class NDTool( - SpecialUnion, - Type[ # type: ignore - Union[ - NDMask, - NDTextEntity, - NDPoint, - NDRectangle, - NDPolyline, - NDPolygon, - ] - ], -): ... - - -class NDAnnotation( - SpecialUnion, - Type[Union[NDTool, NDClassification]], # type: ignore -): - @classmethod - def build(cls: Any, data) -> "NDBase": - if not isinstance(data, dict): - raise ValueError("value must be dict") - errors = [] - for cl in cls.get_union_types(): - try: - return cl(**data) - except KeyError as e: - errors.append(f"{cl.__name__}: {e}") - - raise ValueError( - "Unable to construct any annotation.\n{}".format("\n".join(errors)) - ) - - @classmethod - def schema(cls): - data = {"definitions": {}} - for type_ in cls.get_union_types(): - schema_ = type_.schema() - data["definitions"].update(schema_.pop("definitions")) - data[type_.__name__] = schema_ - return data diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index c30727bfd..a47e44e25 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -4,6 +4,7 @@ import warnings from collections import namedtuple from datetime import datetime, timezone +from pathlib import Path from string import Template from typing import ( TYPE_CHECKING,