From 0bbd7c290ac4aa55bc5a373b3b63fa77c68dc39f Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 16 Sep 2024 18:05:39 -0700 Subject: [PATCH 01/35] Vb/fix ontology leaks plt 1379 (#1814) --- .../labelbox/schema/bulk_import_request.py | 8 +- .../schema/labeling_service_dashboard.py | 38 ++-- libs/labelbox/tests/conftest.py | 191 +++++++++++++----- .../tests/data/annotation_import/conftest.py | 20 +- .../data/annotation_import/test_model_run.py | 20 +- libs/labelbox/tests/data/export/conftest.py | 11 +- .../tests/data/test_data_row_metadata.py | 15 -- libs/labelbox/tests/integration/conftest.py | 4 +- .../tests/integration/test_feature_schema.py | 18 +- .../unit/test_labeling_service_dashboard.py | 102 +++++----- 10 files changed, 260 insertions(+), 167 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py index 44ac7cd6a..8e11f3261 100644 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ b/libs/labelbox/src/labelbox/schema/bulk_import_request.py @@ -787,9 +787,7 @@ def validate_feature_schemas( # A union with custom construction logic to improve error messages class NDClassification( SpecialUnion, - Type[ # type: ignore - Union[NDText, NDRadio, NDChecklist] - ], + Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore ): ... @@ -979,9 +977,7 @@ class NDTool( class NDAnnotation( SpecialUnion, - Type[ # type: ignore - Union[NDTool, NDClassification] - ], + Type[Union[NDTool, NDClassification]], # type: ignore ): @classmethod def build(cls: Any, data) -> "NDBase": diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index 2052897f6..c5e1fa11e 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -84,7 +84,8 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if not self.client.enable_experimental: raise RuntimeError( - "Please enable experimental in client to use LabelingService") + "Please enable experimental in client to use LabelingService" + ) @property def service_type(self): @@ -97,20 +98,28 @@ def service_type(self): if self.editor_task_type is None: return sentence_case(self.media_type.value) - if (self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation - and self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Offline chat evaluation" - if (self.editor_task_type == EditorTaskType.ModelChatEvaluation and - self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.ModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Live chat evaluation" - if (self.editor_task_type == EditorTaskType.ResponseCreation and - self.media_type == MediaType.Text): + if ( + self.editor_task_type == EditorTaskType.ResponseCreation + and self.media_type == MediaType.Text + ): return "Response creation" - if (self.media_type == MediaType.LLMPromptCreation or - self.media_type == MediaType.LLMPromptResponseCreation): + if ( + self.media_type == MediaType.LLMPromptCreation + or self.media_type == MediaType.LLMPromptResponseCreation + ): return "Prompt response creation" return sentence_case(self.media_type.value) @@ -154,7 +163,8 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) else: template = Template( """query SearchProjectsPyApi($$first: Int, $$from: String) { @@ -164,11 +174,13 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) query_str = template.substitute( labeling_dashboard_selections=GRAPHQL_QUERY_SELECTIONS, search_query=build_search_filter(search_query) - if search_query else None, + if search_query + else None, ) params: Dict[str, Union[str, int]] = {} @@ -186,7 +198,7 @@ def convert_to_labeling_service_dashboard(client, data): experimental=True, ) - @model_validator(mode='before') + @model_validator(mode="before") def convert_boost_data(cls, data): if "boostStatus" in data: data["status"] = LabelingServiceStatus(data.pop("boostStatus")) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index 446db396b..6d13a8d83 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -7,7 +7,9 @@ import re import uuid import time +from labelbox.schema.project import Project import requests +from labelbox.schema.ontology import Ontology import pytest from types import SimpleNamespace from typing import Type @@ -23,21 +25,11 @@ from labelbox.schema.queue_mode import QueueMode from labelbox import Client -from labelbox import Dataset, DataRow from labelbox import LabelingFrontend -from labelbox import OntologyBuilder, Tool, Option, Classification, MediaType -from labelbox.orm import query -from labelbox.pagination import PaginatedCollection +from labelbox import OntologyBuilder, Tool, Option, Classification from labelbox.schema.annotation_import import LabelImport -from labelbox.schema.catalog import Catalog from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.invite import Invite -from labelbox.schema.quality_mode import QualityMode -from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.user import User from labelbox.exceptions import LabelboxError -from contextlib import suppress -from labelbox import Client IMG_URL = "https://picsum.photos/200/300.jpg" MASKABLE_IMG_URL = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg" @@ -638,17 +630,22 @@ def organization(client): def configured_project_with_label( client, rand_gen, - image_url, - project, dataset, data_row, wait_for_label_processing, + teardown_helpers, ): """Project with a connected dataset, having one datarow + Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) project._wait_until_data_rows_are_processed( data_row_ids=[data_row.uid], wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS, @@ -666,8 +663,7 @@ def configured_project_with_label( ) yield [project, dataset, data_row, label] - for label in project.labels(): - label.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) def _create_label(project, data_row, ontology, wait_for_label_processing): @@ -736,13 +732,23 @@ def big_dataset(dataset: Dataset): @pytest.fixture def configured_batch_project_with_label( - project, dataset, data_row, wait_for_label_processing + client, + dataset, + data_row, + wait_for_label_processing, + rand_gen, + teardown_helpers, ): """Project with a batch having one datarow Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) data_rows = [dr.uid for dr in list(dataset.data_rows())] project._wait_until_data_rows_are_processed( data_row_ids=data_rows, sleep_interval=3 @@ -757,18 +763,27 @@ def configured_batch_project_with_label( yield [project, dataset, data_row, label] - for label in project.labels(): - label.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture def configured_batch_project_with_multiple_datarows( - project, dataset, data_rows, wait_for_label_processing + client, + dataset, + data_rows, + wait_for_label_processing, + rand_gen, + teardown_helpers, ): """Project with a batch having multiple datarows Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) global_keys = [dr.global_key for dr in data_rows] batch_name = f"batch {uuid.uuid4()}" @@ -780,26 +795,7 @@ def configured_batch_project_with_multiple_datarows( yield [project, dataset, data_rows] - for label in project.labels(): - label.delete() - - -@pytest.fixture -def configured_batch_project_for_labeling_service( - project, data_row_and_global_key -): - """Project with a batch having multiple datarows - Project contains an ontology with 1 bbox tool - Additionally includes a create_label method for any needed extra labels - """ - global_keys = [data_row_and_global_key[1]] - - batch_name = f"batch {uuid.uuid4()}" - project.create_batch(batch_name, global_keys=global_keys) - - _setup_ontology(project) - - yield project + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) # NOTE this is nice heuristics, also there is this logic _wait_until_data_rows_are_processed in Project @@ -1062,7 +1058,7 @@ def project_with_empty_ontology(project): @pytest.fixture def configured_project_with_complex_ontology( - client, initial_dataset, rand_gen, image_url + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), @@ -1127,7 +1123,7 @@ def configured_project_with_complex_ontology( project.setup(editor, ontology.asdict()) yield [project, data_row] - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture @@ -1147,12 +1143,13 @@ def valid_model_id(): @pytest.fixture def requested_labeling_service( - rand_gen, - live_chat_evaluation_project_with_new_dataset, - chat_evaluation_ontology, - model_config, + rand_gen, client, chat_evaluation_ontology, model_config, teardown_helpers ): - project = live_chat_evaluation_project_with_new_dataset + project_name = f"test-model-evaluation-project-{rand_gen(str)}" + dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}" + project = client.create_model_evaluation_project( + name=project_name, dataset_name=dataset_name, data_row_count=1 + ) project.connect_ontology(chat_evaluation_ontology) project.upsert_instructions("tests/integration/media/sample_pdf.pdf") @@ -1164,3 +1161,105 @@ def requested_labeling_service( labeling_service.request() yield project, project.get_labeling_service() + + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) + + +class TearDownHelpers: + @staticmethod + def teardown_project_labels_ontology_feature_schemas(project: Project): + """ + Call this function to release project, labels, ontology and feature schemas in fixture teardown + + NOTE: exception handling is not required as this is a fixture teardown + """ + ontology = project.ontology() + ontology_id = ontology.uid + client = project.client + classification_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["classifications"] + ] + tool_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["tools"] + ] + + feature_schema_ids = ( + classification_feature_schema_ids + tool_feature_schema_ids + ) + labels = list(project.labels()) + for label in labels: + label.delete() + + project.delete() + client.delete_unused_ontology(ontology_id) + for feature_schema_id in feature_schema_ids: + try: + project.client.delete_unused_feature_schema(feature_schema_id) + except LabelboxError as e: + print( + f"Failed to delete feature schema {feature_schema_id}: {e}" + ) + + @staticmethod + def teardown_ontology_feature_schemas(ontology: Ontology): + """ + Call this function to release project, labels, ontology and feature schemas in fixture teardown + + NOTE: exception handling is not required as this is a fixture teardown + """ + ontology_id = ontology.uid + client = ontology.client + classification_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["classifications"] + ] + [ + option["featureSchemaId"] + for feature in ontology.normalized["classifications"] + for option in feature.get("options", []) + ] + + tool_feature_schema_ids = ( + [ + feature["featureSchemaId"] + for feature in ontology.normalized["tools"] + ] + + [ + classification["featureSchemaId"] + for tool in ontology.normalized["tools"] + for classification in tool.get("classifications", []) + ] + + [ + option["featureSchemaId"] + for tool in ontology.normalized["tools"] + for classification in tool.get("classifications", []) + for option in classification.get("options", []) + ] + ) + + feature_schema_ids = ( + classification_feature_schema_ids + tool_feature_schema_ids + ) + + client.delete_unused_ontology(ontology_id) + for feature_schema_id in feature_schema_ids: + try: + project.client.delete_unused_feature_schema(feature_schema_id) + except LabelboxError as e: + print( + f"Failed to delete feature schema {feature_schema_id}: {e}" + ) + + +class ModuleTearDownHelpers(TearDownHelpers): ... + + +@pytest.fixture +def teardown_helpers(): + return TearDownHelpers() + + +@pytest.fixture(scope="module") +def module_teardown_helpers(): + return TearDownHelpers() diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 39cede0bb..6543f54bf 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -1,4 +1,3 @@ -import itertools import uuid from labelbox.schema.model_run import ModelRun @@ -14,7 +13,6 @@ from typing import Tuple, Type from labelbox.schema.annotation_import import LabelImport, AnnotationImportState from pytest import FixtureRequest -from contextlib import suppress """ The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType. @@ -719,7 +717,6 @@ def _create_project( ) project.connect_ontology(ontology) - data_row_data = [] for _ in range(DATA_ROW_COUNT): @@ -752,6 +749,7 @@ def configured_project( normalized_ontology_by_media_type, export_v2_test_helpers, llm_prompt_response_creation_dataset_with_data_row, + teardown_helpers, ): """Configure project for test. Request.param will contain the media type if not present will use Image MediaType. The project will have 10 data rows.""" @@ -789,13 +787,11 @@ def configured_project( yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture() def configured_project_by_global_key( @@ -805,6 +801,7 @@ def configured_project_by_global_key( request: FixtureRequest, normalized_ontology_by_media_type, export_v2_test_helpers, + teardown_helpers, ): """Does the same thing as configured project but with global keys focus.""" @@ -841,13 +838,11 @@ def configured_project_by_global_key( yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture(scope="module") def module_project( @@ -856,6 +851,7 @@ def module_project( data_row_json_by_media_type, request: FixtureRequest, normalized_ontology_by_media_type, + module_teardown_helpers, ): """Generates a image project that scopes to the test module(file). Used to reduce api calls.""" @@ -889,13 +885,13 @@ def module_project( yield project - project.delete() + module_teardown_helpers.teardown_project_labels_ontology_feature_schemas( + project + ) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture def prediction_id_mapping(request, normalized_ontology_by_media_type): diff --git a/libs/labelbox/tests/data/annotation_import/test_model_run.py b/libs/labelbox/tests/data/annotation_import/test_model_run.py index 9eca28429..1174115c5 100644 --- a/libs/labelbox/tests/data/annotation_import/test_model_run.py +++ b/libs/labelbox/tests/data/annotation_import/test_model_run.py @@ -7,13 +7,23 @@ from labelbox import DataSplit, ModelRun -@pytest.mark.order(1) -def test_model_run(client, configured_project_with_label, data_row, rand_gen): +@pytest.fixture +def current_model(client, configured_project_with_label, rand_gen): project, _, _, label = configured_project_with_label - label_id = label.uid ontology = project.ontology() - data = {"name": rand_gen(str), "ontology_id": ontology.uid} - model = client.create_model(data["name"], data["ontology_id"]) + + model = client.create_model(rand_gen(str), ontology.uid) + yield model + + model.delete() + + +def test_model_run( + client, configured_project_with_label, current_model, data_row, rand_gen +): + _, _, _, label = configured_project_with_label + label_id = label.uid + model = current_model name = rand_gen(str) config = {"batch_size": 100, "reruns": None} diff --git a/libs/labelbox/tests/data/export/conftest.py b/libs/labelbox/tests/data/export/conftest.py index 0836c2b9e..0a62f39c8 100644 --- a/libs/labelbox/tests/data/export/conftest.py +++ b/libs/labelbox/tests/data/export/conftest.py @@ -2,7 +2,6 @@ import time import pytest from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.media_type import MediaType from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.annotation_import import LabelImport, AnnotationImportState @@ -242,7 +241,7 @@ def polygon_inference(prediction_id_mapping): @pytest.fixture def configured_project_with_ontology( - client, initial_dataset, ontology, rand_gen, image_url + client, initial_dataset, ontology, rand_gen, image_url, teardown_helpers ): dataset = initial_dataset project = client.create_project( @@ -264,11 +263,13 @@ def configured_project_with_ontology( ) project.data_row_ids = data_row_ids yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture -def configured_project_without_data_rows(client, ontology, rand_gen): +def configured_project_without_data_rows( + client, ontology, rand_gen, teardown_helpers +): project = client.create_project( name=rand_gen(str), description=rand_gen(str), @@ -279,7 +280,7 @@ def configured_project_without_data_rows(client, ontology, rand_gen): )[0] project.setup(editor, ontology) yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture diff --git a/libs/labelbox/tests/data/test_data_row_metadata.py b/libs/labelbox/tests/data/test_data_row_metadata.py index 9a3690776..891cab9be 100644 --- a/libs/labelbox/tests/data/test_data_row_metadata.py +++ b/libs/labelbox/tests/data/test_data_row_metadata.py @@ -92,21 +92,6 @@ def make_named_metadata(dr_id) -> DataRowMetadata: return metadata -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_export_empty_metadata( - client, configured_project_with_label, wait_for_data_row_processing -): - project, _, data_row, _ = configured_project_with_label - data_row = wait_for_data_row_processing(client, data_row) - - export_task = project.export(params={"metadata_fields": True}) - export_task.wait_till_done() - stream = export_task.get_buffered_stream() - data_row = [data_row.json for data_row in stream][0] - - assert data_row["metadata_fields"] == [] - - def test_bulk_export_datarow_metadata(data_row, mdo: DataRowMetadataOntology): metadata = make_metadata(data_row.uid) mdo.bulk_upsert([metadata]) diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index d37287fe8..c917a6164 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -113,7 +113,7 @@ def configured_project( @pytest.fixture def configured_project_with_complex_ontology( - client, initial_dataset, rand_gen, image_url + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), @@ -178,7 +178,7 @@ def configured_project_with_complex_ontology( project.setup(editor, ontology.asdict()) yield [project, data_row] - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture diff --git a/libs/labelbox/tests/integration/test_feature_schema.py b/libs/labelbox/tests/integration/test_feature_schema.py index 1dc940f08..46ec8c067 100644 --- a/libs/labelbox/tests/integration/test_feature_schema.py +++ b/libs/labelbox/tests/integration/test_feature_schema.py @@ -58,9 +58,8 @@ def test_throws_an_error_if_feature_schema_to_delete_doesnt_exist(client): client.delete_unused_feature_schema("doesntexist") -def test_updates_a_feature_schema_title(client): - tool = client.upsert_feature_schema(point.asdict()) - feature_schema_id = tool.normalized["featureSchemaId"] +def test_updates_a_feature_schema_title(client, feature_schema): + feature_schema_id = feature_schema.normalized["featureSchemaId"] new_title = "new title" updated_feature_schema = client.update_feature_schema_title( feature_schema_id, new_title @@ -68,20 +67,16 @@ def test_updates_a_feature_schema_title(client): assert updated_feature_schema.normalized["name"] == new_title - client.delete_unused_feature_schema(feature_schema_id) - def test_throws_an_error_when_updating_a_feature_schema_with_empty_title( - client, + client, feature_schema ): - tool = client.upsert_feature_schema(point.asdict()) + tool = feature_schema feature_schema_id = tool.normalized["featureSchemaId"] with pytest.raises(Exception): client.update_feature_schema_title(feature_schema_id, "") - client.delete_unused_feature_schema(feature_schema_id) - def test_throws_an_error_when_updating_not_existing_feature_schema(client): with pytest.raises(Exception): @@ -107,8 +102,8 @@ def test_updates_a_feature_schema(client, feature_schema): assert updated_feature_schema.normalized["name"] == "new name" -def test_does_not_include_used_feature_schema(client): - tool = client.upsert_feature_schema(point.asdict()) +def test_does_not_include_used_feature_schema(client, feature_schema): + tool = feature_schema feature_schema_id = tool.normalized["featureSchemaId"] ontology = client.create_ontology_from_feature_schemas( name="ontology name", @@ -120,4 +115,3 @@ def test_does_not_include_used_feature_schema(client): assert feature_schema_id not in unused_feature_schemas client.delete_unused_ontology(ontology.uid) - client.delete_unused_feature_schema(feature_schema_id) diff --git a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py index 8ecdef2f1..061efbadf 100644 --- a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py +++ b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py @@ -5,23 +5,23 @@ def test_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count is None @@ -29,23 +29,23 @@ def test_no_tasks_remaining_count(): def test_tasks_remaining_count_exists(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 1, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 1, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 1 @@ -53,23 +53,23 @@ def test_tasks_remaining_count_exists(): def test_tasks_total_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 1, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 1, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 1, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 1, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 0 From 51ecfeab2efa15402d949b5799e21f77ea26ee95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20J=C3=B3=C5=BAwiak?= Date: Mon, 9 Sep 2024 15:24:35 +0200 Subject: [PATCH 02/35] [PTDT-2553] Added integration tests for MMC MAL/GT imports --- .../tests/data/annotation_import/conftest.py | 495 +++++++++++++++++- .../test_generic_data_types.py | 6 + 2 files changed, 500 insertions(+), 1 deletion(-) diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 6543f54bf..2342a759a 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -1,4 +1,5 @@ import uuid +from typing import Union from labelbox.schema.model_run import ModelRun from labelbox.schema.ontology import Ontology @@ -152,6 +153,22 @@ def llm_human_preference_data_row(global_key): return llm_human_preference_data_row +@pytest.fixture(scope="module") +def mmc_data_row_url(): + return "https://storage.googleapis.com/labelbox-datasets/conversational_model_evaluation_sample/offline-model-chat-evaluation.json" + + +@pytest.fixture(scope="module", autouse=True) +def offline_model_evaluation_data_row_factory(mmc_data_row_url: str): + def offline_model_evaluation_data_row(global_key: str): + return { + "row_data": mmc_data_row_url, + "global_key": global_key, + } + + return offline_model_evaluation_data_row + + @pytest.fixture(scope="module", autouse=True) def data_row_json_by_media_type( audio_data_row_factory, @@ -163,6 +180,7 @@ def data_row_json_by_media_type( document_data_row_factory, text_data_row_factory, video_data_row_factory, + offline_model_evaluation_data_row_factory, ): return { MediaType.Audio: audio_data_row_factory, @@ -174,6 +192,7 @@ def data_row_json_by_media_type( MediaType.Document: document_data_row_factory, MediaType.Text: text_data_row_factory, MediaType.Video: video_data_row_factory, + OntologyKind.ModelEvaluation: offline_model_evaluation_data_row_factory, } @@ -345,6 +364,26 @@ def normalized_ontology_by_media_type(): ], } + radio_index = { + "required": False, + "instructions": "radio_index", + "name": "radio_index", + "type": "radio", + "scope": "index", + "options": [ + { + "label": "first_radio_answer", + "value": "first_radio_answer", + "options": [], + }, + { + "label": "second_radio_answer", + "value": "second_radio_answer", + "options": [], + }, + ], + } + prompt_text = { "instructions": "prompt-text", "name": "prompt-text", @@ -403,6 +442,27 @@ def normalized_ontology_by_media_type(): "type": "response-text", } + message_single_selection_task = { + "required": False, + "name": "message-single-selection", + "tool": "message-single-selection", + "classifications": [], + } + + message_multi_selection_task = { + "required": False, + "name": "message-multi-selection", + "tool": "message-multi-selection", + "classifications": [], + } + + message_ranking_task = { + "required": False, + "name": "message-ranking", + "tool": "message-ranking", + "classifications": [], + } + return { MediaType.Image: { "tools": [ @@ -516,6 +576,21 @@ def normalized_ontology_by_media_type(): response_checklist, ], }, + OntologyKind.ModelEvaluation: { + "tools": [ + message_single_selection_task, + message_multi_selection_task, + message_ranking_task, + ], + "classifications": [ + radio, + checklist, + free_form_text, + radio_index, + checklist_index, + free_form_text_index, + ], + }, "all": { "tools": [ bbox_tool, @@ -695,6 +770,45 @@ def _create_prompt_response_project( return prompt_response_project, ontology +def _create_offline_mmc_project( + client: Client, rand_gen, data_row_json, normalized_ontology +) -> Tuple[Project, Ontology, Dataset]: + dataset = client.create_dataset(name=rand_gen(str)) + + project = client.create_offline_model_evaluation_project( + name=f"offline-mmc-{rand_gen(str)}", + ) + + ontology = client.create_ontology( + name=f"offline-mmc-{rand_gen(str)}", + normalized=normalized_ontology, + media_type=MediaType.Conversational, + ontology_kind=OntologyKind.ModelEvaluation, + ) + + project.connect_ontology(ontology) + + data_row_data = [ + data_row_json(rand_gen(str)) for _ in range(DATA_ROW_COUNT) + ] + + task = dataset.create_data_rows(data_row_data) + task.wait_till_done() + global_keys = [row["global_key"] for row in task.result] + data_row_ids = [row["id"] for row in task.result] + + project.create_batch( + rand_gen(str), + data_row_ids, # sample of data row objects + 5, # priority between 1(Highest) - 5(lowest) + ) + project.data_row_ids = data_row_ids + project.data_row_data = data_row_data + project.global_keys = global_keys + + return project, ontology, dataset + + def _create_project( client: Client, rand_gen, @@ -753,7 +867,10 @@ def configured_project( ): """Configure project for test. Request.param will contain the media type if not present will use Image MediaType. The project will have 10 data rows.""" - media_type = getattr(request, "param", MediaType.Image) + media_type: Union[MediaType, OntologyKind] = getattr( + request, "param", MediaType.Image + ) + dataset = None if ( @@ -776,6 +893,13 @@ def configured_project( media_type, normalized_ontology_by_media_type, ) + elif media_type == OntologyKind.ModelEvaluation: + project, ontology, dataset = _create_offline_mmc_project( + client, + rand_gen, + data_row_json_by_media_type[media_type], + normalized_ontology_by_media_type[media_type], + ) else: project, ontology, dataset = _create_project( client, @@ -827,6 +951,13 @@ def configured_project_by_global_key( media_type, normalized_ontology_by_media_type, ) + elif media_type == OntologyKind.ModelEvaluation: + project, ontology, dataset = _create_offline_mmc_project( + client, + rand_gen, + data_row_json_by_media_type[media_type], + normalized_ontology_by_media_type[media_type], + ) else: project, ontology, dataset = _create_project( client, @@ -988,6 +1119,31 @@ def prediction_id_mapping(request, normalized_ontology_by_media_type): return base_annotations +@pytest.fixture +def mmc_example_data_row_message_ids(mmc_data_row_url: str): + data_row_content = requests.get(mmc_data_row_url).json() + + human_id = next( + actor_id + for actor_id, actor_metadata in data_row_content["actors"].items() + if actor_metadata["role"] == "human" + ) + + return { + message_id: [ + { + "id": child_msg_id, + "model_config_name": data_row_content["actors"][ + data_row_content["messages"][child_msg_id]["actorId"] + ]["metadata"]["modelConfigName"], + } + for child_msg_id in message_metadata["childMessageIds"] + ] + for message_id, message_metadata in data_row_content["messages"].items() + if message_metadata["actorId"] == human_id + } + + # Each inference represents a feature type that adds to the base annotation created with prediction_id_mapping @pytest.fixture def polygon_inference(prediction_id_mapping): @@ -1303,6 +1459,31 @@ def checklist_inference_index(prediction_id_mapping): return checklists +@pytest.fixture +def checklist_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + checklists = [] + for feature in prediction_id_mapping: + if "checklist_index" not in feature: + return None + checklist = feature["checklist_index"].copy() + checklist.update( + { + "answers": [ + {"name": "first_checklist_answer"}, + {"name": "second_checklist_answer"}, + ], + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del checklist["tool"] + checklists.append(checklist) + return checklists + + @pytest.fixture def prompt_text_inference(prediction_id_mapping): prompt_texts = [] @@ -1333,6 +1514,45 @@ def radio_response_inference(prediction_id_mapping): return response_radios +@pytest.fixture +def radio_inference(prediction_id_mapping): + radios = [] + for feature in prediction_id_mapping: + if "radio" not in feature: + continue + radio = feature["radio"].copy() + radio.update( + { + "answer": {"name": "first_radio_answer"}, + } + ) + del radio["tool"] + radios.append(radio) + return radios + + +@pytest.fixture +def radio_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + radios = [] + for feature in prediction_id_mapping: + if "radio_index" not in feature: + continue + radio = feature["radio_index"].copy() + radio.update( + { + "answer": {"name": "first_radio_answer"}, + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del radio["tool"] + radios.append(radio) + return radios + + @pytest.fixture def checklist_response_inference(prediction_id_mapping): response_checklists = [] @@ -1402,6 +1622,28 @@ def text_inference_index(prediction_id_mapping): return texts +@pytest.fixture +def text_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + texts = [] + for feature in prediction_id_mapping: + if "text_index" not in feature: + continue + text = feature["text_index"].copy() + text.update( + { + "answer": "free form text...", + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del text["tool"] + texts.append(text) + return texts + + @pytest.fixture def video_checklist_inference(prediction_id_mapping): checklists = [] @@ -1437,6 +1679,118 @@ def video_checklist_inference(prediction_id_mapping): return checklists +@pytest.fixture +def message_single_selection_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-single-selection" not in feature: + continue + selection = feature["message-single-selection"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-single-selection", + "data": { + "messageId": some_child_ids[0]["id"], + "parentMessageId": some_parent_id, + "modelConfigName": some_child_ids[0][ + "model_config_name" + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + +@pytest.fixture +def message_multi_selection_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-multi-selection" not in feature: + continue + selection = feature["message-multi-selection"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-multi-selection", + "data": { + "parentMessageId": some_parent_id, + "selectedMessages": [ + { + "messageId": child_id["id"], + "modelConfigName": child_id[ + "model_config_name" + ], + } + for child_id in some_child_ids + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + +@pytest.fixture +def message_ranking_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-ranking" not in feature: + continue + selection = feature["message-ranking"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-ranking", + "data": { + "parentMessageId": some_parent_id, + "rankedMessages": [ + { + "messageId": child_id["id"], + "modelConfigName": child_id[ + "model_config_name" + ], + "order": idx, + } + for idx, child_id in enumerate( + some_child_ids, start=1 + ) + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + @pytest.fixture def annotations_by_media_type( polygon_inference, @@ -1456,6 +1810,13 @@ def annotations_by_media_type( checklist_response_inference, radio_response_inference, text_response_inference, + message_single_selection_inference, + message_multi_selection_inference, + message_ranking_inference, + checklist_inference_index_mmc, + radio_inference, + radio_inference_index_mmc, + text_inference_index_mmc, ): return { MediaType.Audio: [checklist_inference, text_inference], @@ -1493,6 +1854,17 @@ def annotations_by_media_type( checklist_response_inference, radio_response_inference, ], + OntologyKind.ModelEvaluation: [ + message_single_selection_inference, + message_multi_selection_inference, + message_ranking_inference, + radio_inference, + checklist_inference, + text_inference, + radio_inference_index_mmc, + checklist_inference_index_mmc, + text_inference_index_mmc, + ], } @@ -2162,6 +2534,125 @@ def expected_export_v2_llm_response_creation(): return expected_annotations +@pytest.fixture +def expected_exports_v2_mmc(mmc_example_data_row_message_ids): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + return { + "objects": [ + { + "name": "message-single-selection", + "annotation_kind": "MessageSingleSelection", + "classifications": [], + "selected_message": { + "message_id": some_child_ids[0]["id"], + "model_config_name": some_child_ids[0]["model_config_name"], + "parent_message_id": some_parent_id, + }, + }, + { + "name": "message-multi-selection", + "annotation_kind": "MessageMultiSelection", + "classifications": [], + "selected_messages": { + "messages": [ + { + "message_id": child_id["id"], + "model_config_name": child_id["model_config_name"], + } + for child_id in some_child_ids + ], + "parent_message_id": some_parent_id, + }, + }, + { + "name": "message-ranking", + "annotation_kind": "MessageRanking", + "classifications": [], + "ranked_messages": { + "ranked_messages": [ + { + "message_id": child_id["id"], + "model_config_name": child_id["model_config_name"], + "order": idx, + } + for idx, child_id in enumerate(some_child_ids, start=1) + ], + "parent_message_id": some_parent_id, + }, + }, + ], + "classifications": [ + { + "name": "radio", + "value": "radio", + "radio_answer": { + "name": "first_radio_answer", + "value": "first_radio_answer", + "classifications": [], + }, + }, + { + "name": "checklist", + "value": "checklist", + "checklist_answers": [ + { + "name": "first_checklist_answer", + "value": "first_checklist_answer", + "classifications": [], + }, + { + "name": "second_checklist_answer", + "value": "second_checklist_answer", + "classifications": [], + }, + ], + }, + { + "name": "text", + "value": "text", + "text_answer": {"content": "free form text..."}, + }, + { + "name": "radio_index", + "value": "radio_index", + "message_id": some_parent_id, + "conversational_radio_answer": { + "name": "first_radio_answer", + "value": "first_radio_answer", + "classifications": [], + }, + }, + { + "name": "checklist_index", + "value": "checklist_index", + "message_id": some_parent_id, + "conversational_checklist_answers": [ + { + "name": "first_checklist_answer", + "value": "first_checklist_answer", + "classifications": [], + }, + { + "name": "second_checklist_answer", + "value": "second_checklist_answer", + "classifications": [], + }, + ], + }, + { + "name": "text_index", + "value": "text_index", + "message_id": some_parent_id, + "conversational_text_answer": {"content": "free form text..."}, + }, + ], + "relationships": [], + } + + @pytest.fixture def exports_v2_by_media_type( expected_export_v2_image, @@ -2175,6 +2666,7 @@ def exports_v2_by_media_type( expected_export_v2_llm_prompt_response_creation, expected_export_v2_llm_prompt_creation, expected_export_v2_llm_response_creation, + expected_exports_v2_mmc, ): return { MediaType.Image: expected_export_v2_image, @@ -2188,6 +2680,7 @@ def exports_v2_by_media_type( MediaType.LLMPromptResponseCreation: expected_export_v2_llm_prompt_response_creation, MediaType.LLMPromptCreation: expected_export_v2_llm_prompt_creation, OntologyKind.ResponseCreation: expected_export_v2_llm_response_creation, + OntologyKind.ModelEvaluation: expected_exports_v2_mmc, } diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index f8f0c449a..9de67bd4e 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -41,6 +41,7 @@ def validate_iso_format(date_string: str): (MediaType.LLMPromptResponseCreation, GenericDataRowData), (MediaType.LLMPromptCreation, GenericDataRowData), (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), ], ) def test_generic_data_row_type_by_data_row_id( @@ -76,6 +77,7 @@ def test_generic_data_row_type_by_data_row_id( # (MediaType.LLMPromptResponseCreation, GenericDataRowData), # (MediaType.LLMPromptCreation, GenericDataRowData), (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), ], ) def test_generic_data_row_type_by_global_key( @@ -115,6 +117,7 @@ def test_generic_data_row_type_by_global_key( ), (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project"], ) @@ -191,6 +194,7 @@ def test_import_media_types( (MediaType.Document, MediaType.Document), (MediaType.Dicom, MediaType.Dicom), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project_by_global_key"], ) @@ -275,6 +279,7 @@ def test_import_media_types_by_global_key( ), (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project"], ) @@ -309,6 +314,7 @@ def test_import_mal_annotations( (MediaType.Document, MediaType.Document), (MediaType.Dicom, MediaType.Dicom), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project_by_global_key"], ) From 24e07661a77f60190a31e0ee6077e04b65a373fe Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 17 Sep 2024 10:57:13 -0700 Subject: [PATCH 03/35] SDK release v.5.0.0 prep (#1823) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 10 ++++++++++ libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 2 +- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b9870b87a..a67a44a24 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '4.0.0' +release = '5.0.0' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index ae97086c6..b2d41b56d 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,4 +1,14 @@ # Changelog +# Version 5.0.0 (2024-09-16) +## Updated +* Set tasks_remaining_count to None LabelingServiceDashboard if labeling has not started ([#1817](https://github.com/Labelbox/labelbox-python/pull/1817)) +* Improve error messaging when creating LLM project with invalid dataset id parameter([#1799](https://github.com/Labelbox/labelbox-python/pull/1799)) +## Removed +* BREAKING CHANGE SDK methods for exports v1([#1800](https://github.com/Labelbox/labelbox-python/pull/1800)) +* BREAKING CHANGE Unused labelbox_v1 serialization package([#1803](https://github.com/Labelbox/labelbox-python/pull/1803)) +## Fixed +* Cuid dependencies that cause a crash if numpy is not installed ([#1807](https://github.com/Labelbox/labelbox-python/pull/1807)) + # Version 4.0.0 (2024-09-10) ## Added * BREAKING CHANGE for pydantic V1 users: Converted SDK to use pydantic V2([#1738](https://github.com/Labelbox/labelbox-python/pull/1738)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index 58ce3410a..f4c24af59 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "4.0.0" +version = "5.0.0" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 633e8f4c2..5b5ac1f67 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,6 +1,6 @@ name = "labelbox" -__version__ = "4.0.0" +__version__ = "5.0.0" from labelbox.client import Client from labelbox.schema.project import Project From 2faf9a10c068621e3a58a690b1dbbddbce0c0f25 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:01:28 -0700 Subject: [PATCH 04/35] Vb/merge 5.0.0 (#1826) Co-authored-by: Gabe <33893811+Gabefire@users.noreply.github.com> --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/orm/model.py | 1 - libs/labelbox/src/labelbox/schema/__init__.py | 21 +- .../labelbox/schema/bulk_import_request.py | 1004 ----------------- libs/labelbox/src/labelbox/schema/enums.py | 25 - libs/labelbox/src/labelbox/schema/project.py | 120 +- .../test_bulk_import_request.py | 258 ----- .../test_ndjson_validation.py | 53 +- .../classification_import_global_key.json | 54 - ...conversation_entity_import_global_key.json | 25 - .../data/assets/ndjson/image_import.json | 779 +------------ .../ndjson/image_import_global_key.json | 823 -------------- .../assets/ndjson/image_import_name_only.json | 810 +------------ .../ndjson/metric_import_global_key.json | 10 - .../assets/ndjson/pdf_import_global_key.json | 155 --- .../ndjson/polyline_import_global_key.json | 36 - .../ndjson/text_entity_import_global_key.json | 26 - .../ndjson/video_import_global_key.json | 166 --- .../serialization/ndjson/test_checklist.py | 26 - .../ndjson/test_classification.py | 108 +- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 - .../serialization/ndjson/test_document.py | 294 ++++- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 - .../serialization/ndjson/test_global_key.py | 125 +- .../data/serialization/ndjson/test_image.py | 203 +++- .../data/serialization/ndjson/test_metric.py | 170 ++- .../data/serialization/ndjson/test_mmc.py | 125 +- .../ndjson/test_ndlabel_subclass_matching.py | 19 - .../data/serialization/ndjson/test_nested.py | 236 +++- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 - .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 ++- .../data/serialization/ndjson/test_text.py | 10 - .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +++++++++++++- 39 files changed, 2380 insertions(+), 4767 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json delete mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 5b5ac1f67..f9b82b422 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,7 +6,6 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 84dcac774..1f3ee1d86 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,7 +386,6 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] - BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 03327e0d1..e57c04a29 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,29 +1,28 @@ -import labelbox.schema.asset_attachment -import labelbox.schema.bulk_import_request import labelbox.schema.annotation_import +import labelbox.schema.asset_attachment +import labelbox.schema.batch import labelbox.schema.benchmark +import labelbox.schema.catalog import labelbox.schema.data_row +import labelbox.schema.data_row_metadata import labelbox.schema.dataset +import labelbox.schema.iam_integration +import labelbox.schema.identifiable +import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service +import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology +import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project +import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook -import labelbox.schema.data_row_metadata -import labelbox.schema.batch -import labelbox.schema.iam_integration -import labelbox.schema.media_type -import labelbox.schema.identifiables -import labelbox.schema.identifiable -import labelbox.schema.catalog -import labelbox.schema.ontology_kind -import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py deleted file mode 100644 index 8e11f3261..000000000 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ /dev/null @@ -1,1004 +0,0 @@ -import json -import time -from uuid import UUID, uuid4 -import functools - -import logging -from pathlib import Path -from google.api_core import retry -from labelbox import parser -import requests -from pydantic import ( - ValidationError, - BaseModel, - Field, - field_validator, - model_validator, - ConfigDict, - StringConstraints, -) -from typing_extensions import Literal, Annotated -from typing import ( - Any, - List, - Optional, - BinaryIO, - Dict, - Iterable, - Tuple, - Union, - Type, - Set, - TYPE_CHECKING, -) - -from labelbox import exceptions as lb_exceptions -from labelbox import utils -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Relationship -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.serialization import serialize_labels -from labelbox.orm.model import Field as lb_Field - -if TYPE_CHECKING: - from labelbox import Project - from labelbox.types import Label - -NDJSON_MIME_TYPE = "application/x-ndjson" -logger = logging.getLogger(__name__) - -# TODO: Deprecate this library in place of labelimport and malprediction import library. - - -def _determinants(parent_cls: Any) -> List[str]: - return [ - k - for k, v in parent_cls.model_fields.items() - if v.json_schema_extra and "determinant" in v.json_schema_extra - ] - - -def _make_file_name(project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - -# TODO(gszpak): move it to client.py -def _make_request_data( - project_id: str, name: str, content_length: int, file_name: str -) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % query.results_query_part(BulkImportRequest) - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length, - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})), - } - - -def _send_create_file_command( - client, - request_data: dict, - file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str], -) -> dict: - response = client.execute(data=request_data, files={file_name: file_data}) - - if not response.get("createBulkImportRequest", None): - raise lb_exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" - % response.get("errors", None) - or response.get("error", None) - ) - - return response - - -class BulkImportRequest(DbObject): - """Represents the import job when importing annotations. - - Attributes: - name (str) - state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) - input_file_url (str): URL to your web-hosted NDJSON file - error_file_url (str): NDJSON that contains error messages for failed annotations - status_file_url (str): NDJSON that contains status for each annotation - created_at (datetime): UTC timestamp for date BulkImportRequest was created - - project (Relationship): `ToOne` relationship to Project - created_by (Relationship): `ToOne` relationship to User - """ - - name = lb_Field.String("name") - state = lb_Field.Enum(BulkImportRequestState, "state") - input_file_url = lb_Field.String("input_file_url") - error_file_url = lb_Field.String("error_file_url") - status_file_url = lb_Field.String("status_file_url") - created_at = lb_Field.DateTime("created_at") - - project = Relationship.ToOne("Project") - created_by = Relationship.ToOne("User", False, "created_by") - - @property - def inputs(self) -> List[Dict[str, Any]]: - """ - Inputs for each individual annotation uploaded. - This should match the ndjson annotations that you have uploaded. - - Returns: - Uploaded ndjson. - - * This information will expire after 24 hours. - """ - return self._fetch_remote_ndjson(self.input_file_url) - - @property - def errors(self) -> List[Dict[str, Any]]: - """ - Errors for each individual annotation uploaded. This is a subset of statuses - - Returns: - List of dicts containing error messages. Empty list means there were no errors - See `BulkImportRequest.statuses` for more details. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.error_file_url) - - @property - def statuses(self) -> List[Dict[str, Any]]: - """ - Status for each individual annotation uploaded. - - Returns: - A status for each annotation if the upload is done running. - See below table for more details - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - Field - - Description - * - uuid - - Specifies the annotation for the status row. - * - dataRow - - JSON object containing the Labelbox data row ID for the annotation. - * - status - - Indicates SUCCESS or FAILURE. - * - errors - - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.status_file_url) - - @functools.lru_cache() - def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: - """ - Fetches the remote ndjson file and caches the results. - - Args: - url (str): Can be any url pointing to an ndjson file. - Returns: - ndjson as a list of dicts. - """ - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.text) - - def refresh(self) -> None: - """Synchronizes values of all fields with the database.""" - query_str, params = query.get_single(BulkImportRequest, self.uid) - res = self.client.execute(query_str, params) - res = res[utils.camel_case(BulkImportRequest.type_name())] - self._set_field_values(res) - - def wait_till_done(self, sleep_time_seconds: int = 5) -> None: - self.wait_until_done(sleep_time_seconds) - - def wait_until_done(self, sleep_time_seconds: int = 5) -> None: - """Blocks import job until certain conditions are met. - - Blocks until the BulkImportRequest.state changes either to - `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, - periodically refreshing object's state. - - Args: - sleep_time_seconds (str): a time to block between subsequent API calls - """ - while self.state == BulkImportRequestState.RUNNING: - logger.info(f"Sleeping for {sleep_time_seconds} seconds...") - time.sleep(sleep_time_seconds) - self.__exponential_backoff_refresh() - - @retry.Retry( - predicate=retry.if_exception_type( - lb_exceptions.ApiLimitError, - lb_exceptions.TimeoutError, - lb_exceptions.NetworkError, - ) - ) - def __exponential_backoff_refresh(self) -> None: - self.refresh() - - @classmethod - def from_name( - cls, client, project_id: str, name: str - ) -> "BulkImportRequest": - """Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name} - response = client.execute(query_str, params=params) - return cls(client, response["bulkImportRequest"]) - - @classmethod - def create_from_url( - cls, client, project_id: str, name: str, url: str, validate=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - validate (bool): a flag indicating if there should be a validation - if `url` is valid ndjson - Returns: - BulkImportRequest object - """ - if validate: - logger.warn( - "Validation is turned on. The file will be downloaded locally and processed before uploading." - ) - res = requests.get(url) - data = parser.loads(res.text) - _validate_ndjson(data, client.get_project(project_id)) - - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls( - client, bulk_import_request_response["createBulkImportRequest"] - ) - - @classmethod - def create_from_objects( - cls, - client, - project_id: str, - name: str, - predictions: Union[Iterable[Dict], Iterable["Label"]], - validate=True, - ) -> "BulkImportRequest": - """ - Creates a `BulkImportRequest` from an iterable of dictionaries. - - Conforms to JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - validate (bool): a flag indicating if there should be a validation - if `predictions` is valid ndjson - Returns: - BulkImportRequest object - """ - if not isinstance(predictions, list): - raise TypeError( - f"annotations must be in a form of Iterable. Found {type(predictions)}" - ) - ndjson_predictions = serialize_labels(predictions) - - if validate: - _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - - data_str = parser.dumps(ndjson_predictions) - if not data_str: - raise ValueError("annotations cannot be empty") - - data = data_str.encode("utf-8") - file_name = _make_file_name(project_id, name) - request_data = _make_request_data( - project_id, name, len(data_str), file_name - ) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, - request_data=request_data, - file_name=file_name, - file_data=file_data, - ) - - return cls(client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file( - cls, client, project_id: str, name: str, file: Path, validate_file=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = _make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = _make_request_data( - project_id, name, content_length, file_name - ) - - with file.open("rb") as f: - if validate_file: - reader = parser.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - _validate_ndjson(reader, client.get_project(project_id)) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, request_data, file_name, file_data - ) - return cls(client, response_data["createBulkImportRequest"]) - - def delete(self) -> None: - """Deletes the import job and also any annotations created by this import. - - Returns: - None - """ - id_param = "bulk_request_id" - query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { - deleteBulkImportRequest(where: {id: $%s}) { - id - name - } - }""" % (id_param, id_param) - self.client.execute(query_str, {id_param: self.uid}) - - -def _validate_ndjson( - lines: Iterable[Dict[str, Any]], project: "Project" -) -> None: - """ - Client side validation of an ndjson object. - - Does not guarentee that an upload will succeed for the following reasons: - * We are not checking the data row types which will cause the following errors to slip through - * Missing frame indices will not causes an error for videos - * Uploaded annotations for the wrong data type will pass (Eg. entity on images) - * We are not checking bounds of an asset (Eg. frame index, image height, text location) - - Args: - lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines - project (Project): id of project for which predictions will be imported - - Raises: - MALValidationError: Raise for invalid NDJson - UuidError: Duplicate UUID in upload - """ - feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( - project.ontology() - ) - uids: Set[str] = set() - for idx, line in enumerate(lines): - try: - annotation = NDAnnotation(**line) - annotation.validate_instance( - feature_schemas_by_id, feature_schemas_by_name - ) - uuid = str(annotation.uuid) - if uuid in uids: - raise lb_exceptions.UuidError( - f"{uuid} already used in this import job, " - "must be unique for the project." - ) - uids.add(uuid) - except (ValidationError, ValueError, TypeError, KeyError) as e: - raise lb_exceptions.MALValidationError( - f"Invalid NDJson on line {idx}" - ) from e - - -# The rest of this file contains objects for MAL validation -def parse_classification(tool): - """ - Parses a classification from an ontology. Only radio, checklist, and text are supported for mal - - Args: - tool (dict) - - Returns: - dict - """ - if tool["type"] in ["radio", "checklist"]: - option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] - option_names = [r["value"] for r in tool["options"]] - return { - "tool": tool["type"], - "featureSchemaId": tool["featureSchemaId"], - "name": tool["name"], - "options": [*option_schema_ids, *option_names], - } - elif tool["type"] == "text": - return { - "tool": tool["type"], - "name": tool["name"], - "featureSchemaId": tool["featureSchemaId"], - } - - -def get_mal_schemas(ontology): - """ - Converts a project ontology to a dict for easier lookup during ndjson validation - - Args: - ontology (Ontology) - Returns: - Dict, Dict : Useful for looking up a tool from a given feature schema id or name - """ - - valid_feature_schemas_by_schema_id = {} - valid_feature_schemas_by_name = {} - for tool in ontology.normalized["tools"]: - classifications = [ - parse_classification(classification_tool) - for classification_tool in tool["classifications"] - ] - classifications_by_schema_id = { - v["featureSchemaId"]: v for v in classifications - } - classifications_by_name = {v["name"]: v for v in classifications} - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - valid_feature_schemas_by_name[tool["name"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - for tool in ontology.normalized["classifications"]: - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( - parse_classification(tool) - ) - valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) - return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name - - -class Bbox(BaseModel): - top: float - left: float - height: float - width: float - - -class Point(BaseModel): - x: float - y: float - - -class FrameLocation(BaseModel): - end: int - start: int - - -class VideoSupported(BaseModel): - # Note that frames are only allowed as top level inferences for video - frames: Optional[List[FrameLocation]] = None - - -# Base class for a special kind of union. -class SpecialUnion: - def __new__(cls, **kwargs): - return cls.build(kwargs) - - @classmethod - def __get_validators__(cls): - yield cls.build - - @classmethod - def get_union_types(cls): - if not issubclass(cls, SpecialUnion): - raise TypeError("{} must be a subclass of SpecialUnion") - - union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] - if len(union_types) < 1: - raise TypeError( - "Class {cls} should inherit from a union of objects to build" - ) - if len(union_types) > 1: - raise TypeError( - f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" - ) - return union_types[0].__args__[0].__args__ - - @classmethod - def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": - """ - Checks through all objects in the union to see which matches the input data. - Args: - data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union - raises: - KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion - ValidationError: Error while trying to construct a specific object in the union - - """ - if isinstance(data, BaseModel): - data = data.model_dump() - - top_level_fields = [] - max_match = 0 - matched = None - - for type_ in cls.get_union_types(): - determinate_fields = _determinants(type_) - top_level_fields.append(determinate_fields) - matches = sum([val in determinate_fields for val in data]) - if matches == len(determinate_fields) and matches > max_match: - max_match = matches - matched = type_ - - if matched is not None: - # These two have the exact same top level keys - if matched in [NDRadio, NDText]: - if isinstance(data["answer"], dict): - matched = NDRadio - elif isinstance(data["answer"], str): - matched = NDText - else: - raise TypeError( - f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" - ) - return matched(**data) - else: - raise KeyError( - f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." - ) - - @classmethod - def schema(cls): - results = {"definitions": {}} - for cl in cls.get_union_types(): - schema = cl.schema() - results["definitions"].update(schema.pop("definitions")) - results[cl.__name__] = schema - return results - - -class DataRow(BaseModel): - id: str - - -class NDFeatureSchema(BaseModel): - schemaId: Optional[str] = None - name: Optional[str] = None - - @model_validator(mode="after") - def most_set_one(self): - if self.schemaId is None and self.name is None: - raise ValueError( - "Must set either schemaId or name for all feature schemas" - ) - return self - - -class NDBase(NDFeatureSchema): - ontology_type: str - uuid: UUID - dataRow: DataRow - model_config = ConfigDict(extra="forbid") - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - if self.name: - if self.name not in valid_feature_schemas_by_name: - raise ValueError( - f"Name {self.name} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_name[self.name]["tool"] - ): - raise ValueError( - f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" - ) - - if self.schemaId: - if self.schemaId not in valid_feature_schemas_by_id: - raise ValueError( - f"Schema id {self.schemaId} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_id[self.schemaId]["tool"] - ): - raise ValueError( - f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" - ) - - def validate_instance( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - self.validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - - -###### Classifications ###### - - -class NDText(NDBase): - ontology_type: Literal["text"] = "text" - answer: str = Field(json_schema_extra={"determinant": True}) - # No feature schema to check - - -class NDChecklist(VideoSupported, NDBase): - ontology_type: Literal["checklist"] = "checklist" - answers: List[NDFeatureSchema] = Field( - json_schema_extra={"determinant": True} - ) - - @field_validator("answers", mode="before") - def validate_answers(cls, value, field): - # constr not working with mypy. - if not len(value): - raise ValueError("Checklist answers should not be empty") - return value - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - # Test top level feature schema for this tool - super(NDChecklist, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - # Test the feature schemas provided to the answer field - if len( - set([answer.name or answer.schemaId for answer in self.answers]) - ) != len(self.answers): - raise ValueError( - f"Duplicated featureSchema found for checklist {self.uuid}" - ) - for answer in self.answers: - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if answer.name not in options and answer.schemaId not in options: - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" - ) - - -class NDRadio(VideoSupported, NDBase): - ontology_type: Literal["radio"] = "radio" - answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDRadio, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if ( - self.answer.name not in options - and self.answer.schemaId not in options - ): - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" - ) - - -# A union with custom construction logic to improve error messages -class NDClassification( - SpecialUnion, - Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore -): ... - - -###### Tools ###### - - -class NDBaseTool(NDBase): - classifications: List[NDClassification] = [] - - # This is indepdent of our problem - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDBaseTool, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - for classification in self.classifications: - classification.validate_feature_schemas( - valid_feature_schemas_by_name[self.name][ - "classificationsBySchemaId" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsBySchemaId" - ], - valid_feature_schemas_by_name[self.name][ - "classificationsByName" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsByName" - ], - ) - - @field_validator("classifications", mode="before") - def validate_subclasses(cls, value, field): - # Create uuid and datarow id so we don't have to define classification objects twice - # This is caused by the fact that we require these ids for top level classifications but not for subclasses - results = [] - dummy_id = "child".center(25, "_") - for row in value: - results.append( - {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} - ) - return results - - -class NDPolygon(NDBaseTool): - ontology_type: Literal["polygon"] = "polygon" - polygon: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("polygon") - def is_geom_valid(cls, v): - if len(v) < 3: - raise ValueError( - f"A polygon must have at least 3 points to be valid. Found {v}" - ) - return v - - -class NDPolyline(NDBaseTool): - ontology_type: Literal["line"] = "line" - line: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("line") - def is_geom_valid(cls, v): - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - return v - - -class NDRectangle(NDBaseTool): - ontology_type: Literal["rectangle"] = "rectangle" - bbox: Bbox = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class NDPoint(NDBaseTool): - ontology_type: Literal["point"] = "point" - point: Point = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class EntityLocation(BaseModel): - start: int - end: int - - -class NDTextEntity(NDBaseTool): - ontology_type: Literal["named-entity"] = "named-entity" - location: EntityLocation = Field(json_schema_extra={"determinant": True}) - - @field_validator("location") - def is_valid_location(cls, v): - if isinstance(v, BaseModel): - v = v.model_dump() - - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - if v["start"] < 0: - raise ValueError(f"Text location must be positive. Found {v}") - if v["start"] > v["end"]: - raise ValueError( - f"Text start location must be less or equal than end. Found {v}" - ) - return v - - -class RLEMaskFeatures(BaseModel): - counts: List[int] - size: List[int] - - @field_validator("counts") - def validate_counts(cls, counts): - if not all([count >= 0 for count in counts]): - raise ValueError( - "Found negative value for counts. They should all be zero or positive" - ) - return counts - - @field_validator("size") - def validate_size(cls, size): - if len(size) != 2: - raise ValueError( - f"Mask `size` should have two ints representing height and with. Found : {size}" - ) - if not all([count > 0 for count in size]): - raise ValueError( - f"Mask `size` should be a postitive int. Found : {size}" - ) - return size - - -class PNGMaskFeatures(BaseModel): - # base64 encoded png bytes - png: str - - -class URIMaskFeatures(BaseModel): - instanceURI: str - colorRGB: Union[List[int], Tuple[int, int, int]] - - @field_validator("colorRGB") - def validate_color(cls, colorRGB): - # Does the dtype matter? Can it be a float? - if not isinstance(colorRGB, (tuple, list)): - raise ValueError( - f"Received color that is not a list or tuple. Found : {colorRGB}" - ) - elif len(colorRGB) != 3: - raise ValueError( - f"Must provide RGB values for segmentation colors. Found : {colorRGB}" - ) - elif not all([0 <= color <= 255 for color in colorRGB]): - raise ValueError( - f"All rgb colors must be between 0 and 255. Found : {colorRGB}" - ) - return colorRGB - - -class NDMask(NDBaseTool): - ontology_type: Literal["superpixel"] = "superpixel" - mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( - json_schema_extra={"determinant": True} - ) - - -# A union with custom construction logic to improve error messages -class NDTool( - SpecialUnion, - Type[ # type: ignore - Union[ - NDMask, - NDTextEntity, - NDPoint, - NDRectangle, - NDPolyline, - NDPolygon, - ] - ], -): ... - - -class NDAnnotation( - SpecialUnion, - Type[Union[NDTool, NDClassification]], # type: ignore -): - @classmethod - def build(cls: Any, data) -> "NDBase": - if not isinstance(data, dict): - raise ValueError("value must be dict") - errors = [] - for cl in cls.get_union_types(): - try: - return cl(**data) - except KeyError as e: - errors.append(f"{cl.__name__}: {e}") - - raise ValueError( - "Unable to construct any annotation.\n{}".format("\n".join(errors)) - ) - - @classmethod - def schema(cls): - data = {"definitions": {}} - for type_ in cls.get_union_types(): - schema_ = type_.schema() - data["definitions"].update(schema_.pop("definitions")) - data[type_.__name__] = schema_ - return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index 6f8aebc58..dfc87c8a4 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,31 +1,6 @@ from enum import Enum -class BulkImportRequestState(Enum): - """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). - - If you are not usinig MEA continue using BulkImportRequest. - AnnotationImports are in beta and will change soon. - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - State - - Description - * - RUNNING - - Indicates that the import job is not done yet. - * - FAILED - - Indicates the import job failed. Check `BulkImportRequest.errors` for more information - * - FINISHED - - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information - """ - - RUNNING = "RUNNING" - FAILED = "FAILED" - FINISHED = "FINISHED" - - class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f8876f7c4..f2de4db5e 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,36 +5,29 @@ import warnings from collections import namedtuple from datetime import datetime, timezone -from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, - Iterable, List, Optional, Tuple, - TypeVar, Union, overload, ) -from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -import requests -from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -46,7 +39,6 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, - validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -63,7 +55,6 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -72,7 +63,7 @@ ) if TYPE_CHECKING: - from labelbox import BulkImportRequest + pass DataRowPriority = int @@ -579,7 +570,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -814,7 +805,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1088,8 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1436,7 +1426,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1488,33 +1478,6 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] - def bulk_import_requests(self) -> PaginatedCollection: - """Returns bulk import request objects which are used in model-assisted labeling. - These are returned with the oldest first, and most recent last. - """ - - id_param = "project_id" - query_str = """query ListAllImportRequestsPyApi($%s: ID!) { - bulkImportRequests ( - where: { projectId: $%s } - skip: %%d - first: %%d - ) { - %s - } - }""" % ( - id_param, - id_param, - query.results_query_part(Entity.BulkImportRequest), - ) - return PaginatedCollection( - self.client, - query_str, - {id_param: str(self.uid)}, - ["bulkImportRequests"], - Entity.BulkImportRequest, - ) - def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1629,7 +1592,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1639,77 +1602,6 @@ def _wait_for_task(self, task_id: str) -> Task: return task - def upload_annotations( - self, - name: str, - annotations: Union[str, Path, Iterable[Dict]], - validate: bool = False, - ) -> "BulkImportRequest": # type: ignore - """Uploads annotations to a new Editor project. - - Args: - name (str): name of the BulkImportRequest job - annotations (str or Path or Iterable): - url that is publicly accessible by Labelbox containing an - ndjson file - OR local path to an ndjson file - OR iterable of annotation rows - validate (bool): - Whether or not to validate the payload before uploading. - Returns: - BulkImportRequest - """ - - if isinstance(annotations, str) or isinstance(annotations, Path): - - def _is_url_valid(url: Union[str, Path]) -> bool: - """Verifies that the given string is a valid url. - - Args: - url: string to be checked - Returns: - True if the given url is valid otherwise False - - """ - if isinstance(url, Path): - return False - parsed = urlparse(url) - return bool(parsed.scheme) and bool(parsed.netloc) - - if _is_url_valid(annotations): - return Entity.BulkImportRequest.create_from_url( - client=self.client, - project_id=self.uid, - name=name, - url=str(annotations), - validate=validate, - ) - else: - path = Path(annotations) - if not path.exists(): - raise FileNotFoundError( - f"{annotations} is not a valid url nor existing local file" - ) - return Entity.BulkImportRequest.create_from_local_file( - client=self.client, - project_id=self.uid, - name=name, - file=path, - validate_file=validate, - ) - elif isinstance(annotations, Iterable): - return Entity.BulkImportRequest.create_from_objects( - client=self.client, - project_id=self.uid, - name=name, - predictions=annotations, # type: ignore - validate=validate, - ) - else: - raise ValueError( - f"Invalid annotations given of type: {type(annotations)}" - ) - def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index 9abae1422..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,258 +0,0 @@ -from unittest.mock import patch -import uuid -from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index a0df559fc..9e8963a26 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,24 +1,8 @@ -from labelbox.schema.media_type import MediaType -from labelbox.schema.project import Project import pytest - -from labelbox import parser -from pytest_cases import parametrize, fixture_ref +from pytest_cases import fixture_ref, parametrize from labelbox.exceptions import MALValidationError -from labelbox.schema.bulk_import_request import ( - NDChecklist, - NDClassification, - NDMask, - NDPolygon, - NDPolyline, - NDRadio, - NDRectangle, - NDText, - NDTextEntity, - NDTool, - _validate_ndjson, -) +from labelbox.schema.media_type import MediaType """ - These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed @@ -191,39 +175,6 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) -def test_validate_ndjson(tmp_path, configured_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - configured_project.upload_annotations( - name="name", annotations=str(file_path), validate=True - ) - - -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - repeat_uuid[0]["uuid"] = "test_uuid" - repeat_uuid[1]["uuid"] = "test_uuid" - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json deleted file mode 100644 index 4de15e217..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "confidence": 0.8, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [ - { - "schemaId": "ckrb1sfl8099e0y919v260awv", - "confidence": 0.82, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } - ], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json deleted file mode 100644 index 83a95e5bf..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json +++ /dev/null @@ -1,25 +0,0 @@ -[{ - "location": { - "start": 67, - "end": 128 - }, - "messageId": "some-message-id", - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] -}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 91563b8ae..75fe36e44 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,16 +8,17 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - } + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + }, + "classifications": [] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -28,20 +29,17 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + }, + "classifications": [] }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -50,762 +48,39 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 + "x": 10.0, + "y": 20.0 }, { - "x": 1119, - "y": 934 + "x": 15.0, + "y": 20.0 }, { - "x": 1118, - "y": 935 + "x": 20.0, + "y": 25.0 }, { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json deleted file mode 100644 index 591e40cf6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json +++ /dev/null @@ -1,823 +0,0 @@ -[ - { - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "schemaId": "ckrazcueb16og0z6609jj7y3y", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.851, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - { - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "schemaId": "ckrazcuec16ok0z66f956apb7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.834, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } - }, - { - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "schemaId": "ckrazcuec16oi0z66dzrd8pfl", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.986, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "polygon": [ - { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 - } - ] - }, - { - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "schemaId": "ckrazcuec16om0z66bhhh4tp7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "point": { - "x": 2122, - "y": 1457 - } - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 82be4cdab..466a03594 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,826 +1,86 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "box a", + "name": "ckrazcueb16og0z6609jj7y3y", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "confidence": 0.854, + "classifications": [], + "confidence": 0.851, "customMetrics": [ { "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.7 + "value": 0.4 } - ] + ], + "bbox": { + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "mask a", + "name": "ckrazcuec16ok0z66f956apb7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - }, - "confidence": 0.685, + "classifications": [], + "confidence": 0.834, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 - }, - { - "name": "customMetric2", - "value": 0.9 + "value": 0.3 } - ] + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + } }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "polygon a", + "name": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.71, + "confidence": 0.986, "customMetrics": [ { "name": "customMetric1", - "value": 0.1 + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 + "x": 10.0, + "y": 20.0 }, { - "x": 1099, - "y": 911 + "x": 15.0, + "y": 20.0 }, { - "x": 1100, - "y": 911 + "x": 20.0, + "y": 25.0 }, { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "point a", + "name": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.77, - "customMetrics": [ - { - "name": "customMetric2", - "value": 1.2 - } - ], "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json deleted file mode 100644 index 31be5a4c7..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "aggregation": "ARITHMETIC_MEAN", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "metricValue": 0.1 - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json deleted file mode 100644 index f4b4894f6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json +++ /dev/null @@ -1,155 +0,0 @@ -[{ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 4, - "unit": "POINTS", - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 162.73, - "left": 32.45, - "height": 388.16999999999996, - "width": 101.66000000000001 - } -}, { - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "bbox": { - "top": 223.26, - "left": 251.42, - "height": 457.03999999999996, - "width": 186.78 - } -}, { - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 6, - "unit": "POINTS", - "confidence": 0.99, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 32.52, - "left": 218.17, - "height": 231.73, - "width": 110.56000000000003 - } -}, { - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "confidence": 0.89, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 117.39, - "left": 4.25, - "height": 456.9200000000001, - "width": 164.83 - } -}, { - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 8, - "unit": "POINTS", - "bbox": { - "top": 82.13, - "left": 217.28, - "height": 279.76, - "width": 82.43000000000004 - } -}, { - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 3, - "unit": "POINTS", - "bbox": { - "top": 298.12, - "left": 83.34, - "height": 203.83000000000004, - "width": 0.37999999999999545 - } -}, -{ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "named_entity", - "classifications": [], - "textSelections": [ - { - "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - "tokenIds": [ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c" - ], - "page": 1 - } - ] -} -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json deleted file mode 100644 index d6a9eecbd..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - { - "line": [ - { - "x": 2534.353, - "y": 249.471 - }, - { - "x": 2429.492, - "y": 182.092 - }, - { - "x": 2294.322, - "y": 221.962 - } - ], - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-line", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.58, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json deleted file mode 100644 index 1f26d8dc8..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "location": { - "start": 67, - "end": 128 - }, - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json deleted file mode 100644 index 11e0753d9..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json +++ /dev/null @@ -1,166 +0,0 @@ -[{ - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" - }, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{ - "start": 30, - "end": 35 - }, { - "start": 50, - "end": 51 - }] -}, { - "answer": [{ - "schemaId": "ckrb1sfl8099e0y919v260awv" - }], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{ - "start": 0, - "end": 5 - }] -}, { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" -}, { - "classifications": [], - "schemaId": - "cl5islwg200gfci6g0oitaypu", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [{ - "keyframes": [{ - "frame": 1, - "line": [{ - "x": 10.0, - "y": 10.0 - }, { - "x": 100.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }, { - "frame": 5, - "line": [{ - "x": 15.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 8, - "line": [{ - "x": 100.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [{ - "keyframes": [{ - "frame": 1, - "point": { - "x": 10.0, - "y": 10.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 5, - "point": { - "x": 50.0, - "y": 50.0 - }, - "classifications": [] - }, { - "frame": 10, - "point": { - "x": 10.0, - "y": 50.0 - }, - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5iw0roz00lwci6g5jni62vs", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [{ - "keyframes": [{ - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0 - }, - "classifications": [] - }, { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0 - }, - "classifications": [] - }] - }] -}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 0bc3c8924..59f568c75 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,13 +37,6 @@ def test_serialization_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_classification(): label = Label( @@ -134,12 +127,6 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested(): label = Label( @@ -233,13 +220,6 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested_2(): label = Label( @@ -330,9 +310,3 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 8dcb17f0b..82adce99c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,15 +1,73 @@ import json +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + Radio, + Text, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ClassificationAnnotation, + ClassificationAnswer, +) +from labelbox.data.mixins import CustomMetric + def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.8, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.82, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data @@ -18,6 +76,48 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + name="classification a", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="choice 1", + ), + ), + ), + ClassificationAnnotation( + name="classification b", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.945, + name="choice 2", + ) + ], + ), + ), + ClassificationAnnotation( + name="classification c", + extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index f7da9181b..561f9ce86 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,8 +1,12 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -99,25 +103,62 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - deserialized_label = list(NDJsonConverter().deserialize(ndjson)) - deserialized_label[0].annotations[0].extra.pop("uuid") - assert deserialized_label[0].model_dump(exclude_none=True) == label[ - 0 - ].model_dump(exclude_none=True) +def test_conversation_entity_import(): + with open( + "tests/data/assets/ndjson/conversation_entity_import.json", "r" + ) as file: + data = json.load(file) -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/conversation_entity_import.json", + label = lb_types.Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) + assert res == data + + +def test_conversation_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - ], -) -def test_conversation_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = lb_types.Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, extra={}, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 333c00250..999e1bda5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,67 +1,29 @@ -from copy import copy -import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import ( - NDDicomSegments, - NDDicomSegment, - NDDicomLine, -) - -""" -Data gen prompt test data -""" - -prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), -) - -prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, -} - -data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], -) - -""" -Prompt annotation test -""" def test_serialize_label(): - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) - # Remove uuid field since this is a random value that can not be specified also meant for relationships - del serialized_label["uuid"] - assert serialized_label == prompt_text_ndjson - - -def test_deserialize_label(): - deserialized_label = next( - NDJsonConverter().deserialize([prompt_text_ndjson]) + prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + extra={"uuid": "test"}, + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), ) - if hasattr(deserialized_label.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized_label.annotations[0].extra = {} - assert deserialized_label.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "test", + } + + data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], + ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) -def test_serialize_deserialize_label(): - serialized = list(NDJsonConverter.serialize([data_gen_label])) - deserialized = next(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized.annotations[0].extra = {} - assert deserialized.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + assert serialized_label == prompt_text_ndjson diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 633214367..762891aa2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,6 +1,5 @@ from copy import copy import pytest -import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -181,28 +180,3 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson - - -@pytest.mark.parametrize("label, ndjson", labels_ndjsons) -def test_deserialize_label(label, ndjson): - deserialized_label = next(NDJsonConverter().deserialize([ndjson])) - if hasattr(deserialized_label.annotations[0], "extra"): - deserialized_label.annotations[0].extra = {} - for i, annotation in enumerate(deserialized_label.annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value - - -@pytest.mark.parametrize("label", labels) -def test_serialize_deserialize_label(label): - serialized = list(NDJsonConverter.serialize([label])) - deserialized = list(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized[0].annotations[0], "extra"): - deserialized[0].annotations[0].extra = {} - for i, annotation in enumerate(deserialized[0].annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index 5fe6a9789..a0897ad9f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,6 +1,19 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + RectangleUnit, + Point, + DocumentRectangle, + DocumentEntity, + DocumentTextSelection, +) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -53,10 +66,144 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.89, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_with_name_only(): @@ -65,26 +212,135 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id=None, + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.74, + name="boxy", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson - - -def test_pdf_bbox_deserialize(): - deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) - deserialized[0].annotations[0].extra = {} - assert ( - deserialized[0].annotations[0].value - == bbox_labels[0].annotations[0].value - ) - assert ( - deserialized[0].annotations[0].name - == bbox_labels[0].annotations[0].name - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 4adcd9935..1ab678cde 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,8 +9,6 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", - file_path=None, - frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -22,6 +20,7 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", + "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -588,31 +587,4 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - manual_label = video_serialized_bbox_label() - - for key in label.keys(): - # ignore uuid because we randomize if there was none - if key != "uuid": - assert label[key] == manual_label[key] - - assert len(label["segments"]) == 2 - assert len(label["segments"][0]["keyframes"]) == 2 - assert len(label["segments"][1]["keyframes"]) == 4 - - # #converts back only the keyframes. should be the sum of all prev segments - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - assert len(label.annotations) == 6 - - -def test_confidence_is_ignored(): - label = video_bbox_label() - serialized_labels = NDJsonConverter.serialize([label]) - label = next(serialized_labels) - label["confidence"] = 0.453 - label["segments"][0]["confidence"] = 0.453 - - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - for annotation in label.annotations: - assert annotation.confidence is None + assert label == video_serialized_bbox_label() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 84c017497..349be13a8 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,16 +34,6 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "text_answer" - assert annotation_value.confidence == 0.5 - def test_nested_serialization(): label = Label( @@ -102,19 +92,3 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - answer = annotation.value.answer[0] - assert answer.confidence == 0.9 - assert answer.name == "first_answer" - - classification_answer = answer.classifications[0].value.answer - assert classification_answer.confidence == 0.8 - assert classification_answer.name == "first_sub_radio_answer" - - sub_classification_answer = classification_answer.classifications[0].value - assert type(sub_classification_answer) is Text - assert sub_classification_answer.answer == "nested answer" - assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index 2b3fa7f8c..d104a691e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,73 +1,74 @@ -import json -import pytest - -from labelbox.data.serialization.ndjson.classification import NDRadio - +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import NDLine - - -def round_dict(data): - if isinstance(data, dict): - for key in data: - if isinstance(data[key], float): - data[key] = int(data[key]) - elif isinstance(data[key], dict): - data[key] = round_dict(data[key]) - elif isinstance(data[key], (list, tuple)): - data[key] = [round_dict(r) for r in data[key]] +from labelbox.types import ( + Label, + ClassificationAnnotation, + Radio, + ClassificationAnswer, +) - return data +def test_generic_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/classification_import_global_key.json", - "tests/data/assets/ndjson/metric_import_global_key.json", - "tests/data/assets/ndjson/polyline_import_global_key.json", - "tests/data/assets/ndjson/text_entity_import_global_key.json", - "tests/data/assets/ndjson/conversation_entity_import_global_key.json", - ], -) -def test_many_types(filename: str): - with open(filename, "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data - f.close() + label = Label( + data=GenericDataRowData( + global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) + res = list(NDJsonConverter.serialize([label])) -def test_image(): - with open( - "tests/data/assets/ndjson/image_import_global_key.json", "r" - ) as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() + assert res == expected -def test_pdf(): - with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() +def test_dict_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] + label = Label( + data={ + "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", + }, + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) -def test_video(): - with open( - "tests/data/assets/ndjson/video_import_global_key.json", "r" - ) as f: - data = json.load(f) + res = list(NDJsonConverter.serialize([label])) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] - f.close() + assert res == expected diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 1729e1f46..d67acb9c3 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,4 +1,8 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -10,6 +14,7 @@ ImageData, MaskData, ) +from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -29,12 +34,74 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_image_with_name_only(): @@ -43,11 +110,74 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + name="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + name="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + name="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + name="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_mask(): @@ -57,10 +187,11 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], + "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -68,16 +199,54 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [255, 0, 0], + "colorRGB": (255, 0, 0), }, + "classifications": [], }, ] - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) + mask_numpy = mask_numpy.astype(np.uint8) + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.8, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Mask( + mask=MaskData(arr=mask_numpy), + color=(1, 1, 1), + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + extra={}, + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=(255, 0, 0), + ), + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + + assert res == data def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 45c5c67bf..40e098405 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,38 +1,166 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.metrics.confusion_matrix import ( + ConfusionMatrixMetric, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ScalarMetric, + ScalarMetricAggregation, + ConfusionMatrixAggregation, +) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert reserialized == data + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + assert res == data def test_custom_scalar_metric(): - with open( - "tests/data/assets/ndjson/custom_scalar_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: 0.1, 0.2: 0.5}, + "metricName": "custom_iou", + "aggregation": "SUM", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value=0.1, + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value={"0.1": 0.1, "0.2": 0.5}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert res == data def test_custom_confusion_matrix_metric(): - with open( - "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (1, 1, 2, 3), + "metricName": "50%_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (0, 1, 2, 5), + "metricName": "50%_iou", + "featureName": "sample_class", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + "metricName": "50%_iou", + "aggregation": "CONFUSION_MATRIX", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ConfusionMatrixMetric( + value=(1, 1, 2, 3), + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value=(0, 1, 2, 5), + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert data == res diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 69594ff73..202f793fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,32 +1,125 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest from labelbox.data.serialization import NDJsonConverter +from labelbox.types import ( + Label, + MessageEvaluationTaskAnnotation, + MessageSingleSelectionTask, + MessageMultiSelectionTask, + MessageInfo, + OrderedMessageInfo, + MessageRankingTask, +) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - deserialized = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(deserialized)) + labels = [ + Label( + data=GenericDataRowData( + uid="cnjencjencjfencvj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="single-selection", + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + value=MessageSingleSelectionTask( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + parent_message_id="clxfznjb800073b6v43ppx9ca", + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cfcerfvergerfefj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="multi-selection", + extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, + value=MessageMultiSelectionTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + selected_messages=[ + MessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + ) + ], + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=2, + ), + ], + ), + ) + ], + ), + ] - assert data == reserialized + res = list(NDJsonConverter.serialize(labels)) + assert res == data -def test_mesage_ranking_task_wrong_order_serialization(): - with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: - data = json.load(file) - - some_ranking_task = next( - task - for task in data - if task["messageEvaluationTask"]["format"] == "message-ranking" - ) - some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ - "order" - ] = 3 +def test_mesage_ranking_task_wrong_order_serialization(): with pytest.raises(ValueError): - list(NDJsonConverter.deserialize([some_ranking_task])) + ( + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={ + "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" + }, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + ], + ), + ) + ], + ), + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py deleted file mode 100644 index 790bd87b3..000000000 --- a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from labelbox.data.serialization.ndjson.label import NDLabel -from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle -import pytest - - -def test_bad_annotation_input(): - data = [{"test": 3}] - with pytest.raises(ValueError): - NDLabel(**{"annotations": data}) - - -def test_correct_annotation_input(): - with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: - data = json.load(f) - assert isinstance( - NDLabel(**{"annotations": [data[0]]}).annotations[0], - NDDocumentRectangle, - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index e0f0df0e6..3633c9cbe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,13 +1,135 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Rectangle, + Point, + ClassificationAnnotation, + Radio, + ClassificationAnswer, + Text, + Checklist, +) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.34, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "5d03213e-4408-456c-9eca-cf0723202961", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.894, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={}, + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -16,6 +138,112 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="box a", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.811, + name="first answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box b", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification b", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.815, + name="second answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification c", + value=Checklist( + answer=[ + ClassificationAnswer( + name="third answer", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="a string", + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index 97d48a14e..cd11d97fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,18 +1,76 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ObjectAnnotation, Point, Line, Label -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/polyline_without_confidence_import.json", - "tests/data/assets/ndjson/polyline_import.json", - ], -) -def test_polyline_import(filename: str): - with open(filename, "r") as file: +def test_polyline_import_with_confidence(): + with open( + "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" + ) as file: + data = json.load(file) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data + + +def test_polyline_import_without_confidence(): + with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.58, + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index bd80f9267..4458e335c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -40,14 +39,6 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_radio_classification(): label = Label( @@ -101,10 +92,3 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert res.annotations[0].model_dump( - exclude_none=True - ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 66630dbb5..0e42ab152 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,6 +1,10 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -8,8 +12,26 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + }, + value=Rectangle( + start=Point(x=38.0, y=28.0), + end=Point(x=81.0, y=69.0), + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -39,8 +61,6 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, }, ) @@ -48,8 +68,9 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + + assert res == data def test_rectangle_mixed_start_end_points(): @@ -76,17 +97,13 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, - }, + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + assert res == data def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index f33719035..235b66957 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,16 +1,135 @@ import json -from uuid import uuid4 -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Point, + Rectangle, + RelationshipAnnotation, + Relationship, +) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) + res = [ + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + extra={}, + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl-DIFFERENT", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + ] res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -44,29 +163,3 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] - - -def test_relationship_nonexistent_object(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - relationship_annotation = data[2] - source_uuid = relationship_annotation["relationship"]["source"] - target_uuid = str(uuid4()) - relationship_annotation["relationship"]["target"] = target_uuid - error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" - - with pytest.raises(ValueError, match=error_msg): - list(NDJsonConverter.deserialize(data)) - - -def test_relationship_duplicate_uuids(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - source, target = data[0], data[1] - target["uuid"] = source["uuid"] - error_msg = f"UUID '{source['uuid']}' is not unique" - - with pytest.raises(AssertionError, match=error_msg): - list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index d5e81c51a..21db389cb 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,7 +1,5 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( - ClassificationAnswer, - Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -34,11 +32,3 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index 3e856f001..fb93f15d4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,21 +1,68 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, TextEntity + + +def test_text_entity_import(): + with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: + data = json.load(file) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/text_entity_import.json", +def test_text_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - ], -) -def test_text_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index c7a6535c4..4fba5c2ca 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json -from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, + Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,8 +13,10 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import VideoObjectAnnotation -from labelbox import parser +from labelbox.data.annotation_types.video import ( + VideoClassificationAnnotation, + VideoObjectAnnotation, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -24,15 +26,275 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_name_only(): @@ -40,16 +302,274 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + name="question 3", + extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_classification_global_subclassifications(): @@ -67,7 +587,6 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( - name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -94,7 +613,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = nested_checklist_annotation_ndjson = { + expected_second_annotation = { "name": "nested_checklist_question", "answer": [ { @@ -116,12 +635,6 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - assert annotation.name == label.annotations[i].name - def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -287,14 +800,6 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_video_classification_point(): bbox_annotation = [ @@ -445,13 +950,6 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - def test_video_classification_frameline(): bbox_annotation = [ @@ -619,9 +1117,289 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value + +[ + { + "answer": "a value", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 5, "start": 0}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5islwg200gfci6g0oitaypu", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + { + "classifications": [], + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + } + ] + }, + ], + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + } + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + }, + { + "classifications": [], + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + }, + ] + }, + ], + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "segments": [ + { + "keyframes": [ + { + "bbox": { + "height": 100.0, + "left": 5.0, + "top": 10.0, + "width": 150.0, + }, + "classifications": [], + "frame": 1, + }, + { + "bbox": { + "height": 50.0, + "left": 5.0, + "top": 30.0, + "width": 150.0, + }, + "classifications": [], + "frame": 5, + }, + ] + }, + { + "keyframes": [ + { + "bbox": { + "height": 400.0, + "left": 200.0, + "top": 300.0, + "width": 150.0, + }, + "classifications": [], + "frame": 10, + } + ] + }, + ], + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + }, +] + +[ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{"start": 0, "end": 5}], + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "classifications": [], + "schemaId": "cl5islwg200gfci6g0oitaypu", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + { + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + } + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + "classifications": [], + } + ] + }, + { + "keyframes": [ + { + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + "classifications": [], + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + "classifications": [], + }, + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0, + }, + "classifications": [], + }, + { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0, + }, + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0, + }, + "classifications": [], + } + ] + }, + ], + }, +] From 2c0c6773f8abdac0928a325d75f709892a92a13d Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:22:02 -0700 Subject: [PATCH 05/35] Revert "Vb/merge 5.0.0 (#1826)" (#1827) --- libs/labelbox/src/labelbox/__init__.py | 1 + libs/labelbox/src/labelbox/orm/model.py | 1 + libs/labelbox/src/labelbox/schema/__init__.py | 21 +- .../labelbox/schema/bulk_import_request.py | 1004 +++++++++++++++++ libs/labelbox/src/labelbox/schema/enums.py | 25 + libs/labelbox/src/labelbox/schema/project.py | 120 +- .../test_bulk_import_request.py | 258 +++++ .../test_ndjson_validation.py | 53 +- .../classification_import_global_key.json | 54 + ...conversation_entity_import_global_key.json | 25 + .../data/assets/ndjson/image_import.json | 779 ++++++++++++- .../ndjson/image_import_global_key.json | 823 ++++++++++++++ .../assets/ndjson/image_import_name_only.json | 810 ++++++++++++- .../ndjson/metric_import_global_key.json | 10 + .../assets/ndjson/pdf_import_global_key.json | 155 +++ .../ndjson/polyline_import_global_key.json | 36 + .../ndjson/text_entity_import_global_key.json | 26 + .../ndjson/video_import_global_key.json | 166 +++ .../serialization/ndjson/test_checklist.py | 26 + .../ndjson/test_classification.py | 108 +- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 + .../serialization/ndjson/test_document.py | 294 +---- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 + .../serialization/ndjson/test_global_key.py | 125 +- .../data/serialization/ndjson/test_image.py | 203 +--- .../data/serialization/ndjson/test_metric.py | 170 +-- .../data/serialization/ndjson/test_mmc.py | 125 +- .../ndjson/test_ndlabel_subclass_matching.py | 19 + .../data/serialization/ndjson/test_nested.py | 236 +--- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 + .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 +-- .../data/serialization/ndjson/test_text.py | 10 + .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +------------- 39 files changed, 4767 insertions(+), 2380 deletions(-) create mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py create mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py create mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index f9b82b422..5b5ac1f67 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,6 +6,7 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 1f3ee1d86..84dcac774 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,6 +386,7 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] + BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index e57c04a29..03327e0d1 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,28 +1,29 @@ -import labelbox.schema.annotation_import import labelbox.schema.asset_attachment -import labelbox.schema.batch +import labelbox.schema.bulk_import_request +import labelbox.schema.annotation_import import labelbox.schema.benchmark -import labelbox.schema.catalog import labelbox.schema.data_row -import labelbox.schema.data_row_metadata import labelbox.schema.dataset -import labelbox.schema.iam_integration -import labelbox.schema.identifiable -import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service -import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology -import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project -import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook +import labelbox.schema.data_row_metadata +import labelbox.schema.batch +import labelbox.schema.iam_integration +import labelbox.schema.media_type +import labelbox.schema.identifiables +import labelbox.schema.identifiable +import labelbox.schema.catalog +import labelbox.schema.ontology_kind +import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py new file mode 100644 index 000000000..8e11f3261 --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/bulk_import_request.py @@ -0,0 +1,1004 @@ +import json +import time +from uuid import UUID, uuid4 +import functools + +import logging +from pathlib import Path +from google.api_core import retry +from labelbox import parser +import requests +from pydantic import ( + ValidationError, + BaseModel, + Field, + field_validator, + model_validator, + ConfigDict, + StringConstraints, +) +from typing_extensions import Literal, Annotated +from typing import ( + Any, + List, + Optional, + BinaryIO, + Dict, + Iterable, + Tuple, + Union, + Type, + Set, + TYPE_CHECKING, +) + +from labelbox import exceptions as lb_exceptions +from labelbox import utils +from labelbox.orm import query +from labelbox.orm.db_object import DbObject +from labelbox.orm.model import Relationship +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.serialization import serialize_labels +from labelbox.orm.model import Field as lb_Field + +if TYPE_CHECKING: + from labelbox import Project + from labelbox.types import Label + +NDJSON_MIME_TYPE = "application/x-ndjson" +logger = logging.getLogger(__name__) + +# TODO: Deprecate this library in place of labelimport and malprediction import library. + + +def _determinants(parent_cls: Any) -> List[str]: + return [ + k + for k, v in parent_cls.model_fields.items() + if v.json_schema_extra and "determinant" in v.json_schema_extra + ] + + +def _make_file_name(project_id: str, name: str) -> str: + return f"{project_id}__{name}.ndjson" + + +# TODO(gszpak): move it to client.py +def _make_request_data( + project_id: str, name: str, content_length: int, file_name: str +) -> dict: + query_str = """mutation createBulkImportRequestFromFilePyApi( + $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + filePayload: { + file: $file, + contentLength: $contentLength + } + }) { + %s + } + } + """ % query.results_query_part(BulkImportRequest) + variables = { + "projectId": project_id, + "name": name, + "file": None, + "contentLength": content_length, + } + operations = json.dumps({"variables": variables, "query": query_str}) + + return { + "operations": operations, + "map": (None, json.dumps({file_name: ["variables.file"]})), + } + + +def _send_create_file_command( + client, + request_data: dict, + file_name: str, + file_data: Tuple[str, Union[bytes, BinaryIO], str], +) -> dict: + response = client.execute(data=request_data, files={file_name: file_data}) + + if not response.get("createBulkImportRequest", None): + raise lb_exceptions.LabelboxError( + "Failed to create BulkImportRequest, message: %s" + % response.get("errors", None) + or response.get("error", None) + ) + + return response + + +class BulkImportRequest(DbObject): + """Represents the import job when importing annotations. + + Attributes: + name (str) + state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) + input_file_url (str): URL to your web-hosted NDJSON file + error_file_url (str): NDJSON that contains error messages for failed annotations + status_file_url (str): NDJSON that contains status for each annotation + created_at (datetime): UTC timestamp for date BulkImportRequest was created + + project (Relationship): `ToOne` relationship to Project + created_by (Relationship): `ToOne` relationship to User + """ + + name = lb_Field.String("name") + state = lb_Field.Enum(BulkImportRequestState, "state") + input_file_url = lb_Field.String("input_file_url") + error_file_url = lb_Field.String("error_file_url") + status_file_url = lb_Field.String("status_file_url") + created_at = lb_Field.DateTime("created_at") + + project = Relationship.ToOne("Project") + created_by = Relationship.ToOne("User", False, "created_by") + + @property + def inputs(self) -> List[Dict[str, Any]]: + """ + Inputs for each individual annotation uploaded. + This should match the ndjson annotations that you have uploaded. + + Returns: + Uploaded ndjson. + + * This information will expire after 24 hours. + """ + return self._fetch_remote_ndjson(self.input_file_url) + + @property + def errors(self) -> List[Dict[str, Any]]: + """ + Errors for each individual annotation uploaded. This is a subset of statuses + + Returns: + List of dicts containing error messages. Empty list means there were no errors + See `BulkImportRequest.statuses` for more details. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.error_file_url) + + @property + def statuses(self) -> List[Dict[str, Any]]: + """ + Status for each individual annotation uploaded. + + Returns: + A status for each annotation if the upload is done running. + See below table for more details + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - Field + - Description + * - uuid + - Specifies the annotation for the status row. + * - dataRow + - JSON object containing the Labelbox data row ID for the annotation. + * - status + - Indicates SUCCESS or FAILURE. + * - errors + - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.status_file_url) + + @functools.lru_cache() + def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: + """ + Fetches the remote ndjson file and caches the results. + + Args: + url (str): Can be any url pointing to an ndjson file. + Returns: + ndjson as a list of dicts. + """ + response = requests.get(url) + response.raise_for_status() + return parser.loads(response.text) + + def refresh(self) -> None: + """Synchronizes values of all fields with the database.""" + query_str, params = query.get_single(BulkImportRequest, self.uid) + res = self.client.execute(query_str, params) + res = res[utils.camel_case(BulkImportRequest.type_name())] + self._set_field_values(res) + + def wait_till_done(self, sleep_time_seconds: int = 5) -> None: + self.wait_until_done(sleep_time_seconds) + + def wait_until_done(self, sleep_time_seconds: int = 5) -> None: + """Blocks import job until certain conditions are met. + + Blocks until the BulkImportRequest.state changes either to + `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, + periodically refreshing object's state. + + Args: + sleep_time_seconds (str): a time to block between subsequent API calls + """ + while self.state == BulkImportRequestState.RUNNING: + logger.info(f"Sleeping for {sleep_time_seconds} seconds...") + time.sleep(sleep_time_seconds) + self.__exponential_backoff_refresh() + + @retry.Retry( + predicate=retry.if_exception_type( + lb_exceptions.ApiLimitError, + lb_exceptions.TimeoutError, + lb_exceptions.NetworkError, + ) + ) + def __exponential_backoff_refresh(self) -> None: + self.refresh() + + @classmethod + def from_name( + cls, client, project_id: str, name: str + ) -> "BulkImportRequest": + """Fetches existing BulkImportRequest. + + Args: + client (Client): a Labelbox client + project_id (str): BulkImportRequest's project id + name (str): name of BulkImportRequest + Returns: + BulkImportRequest object + + """ + query_str = """query getBulkImportRequestPyApi( + $projectId: ID!, $name: String!) { + bulkImportRequest(where: { + projectId: $projectId, + name: $name + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name} + response = client.execute(query_str, params=params) + return cls(client, response["bulkImportRequest"]) + + @classmethod + def create_from_url( + cls, client, project_id: str, name: str, url: str, validate=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a publicly accessible URL + to an ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + url (str): publicly accessible URL pointing to ndjson file containing predictions + validate (bool): a flag indicating if there should be a validation + if `url` is valid ndjson + Returns: + BulkImportRequest object + """ + if validate: + logger.warn( + "Validation is turned on. The file will be downloaded locally and processed before uploading." + ) + res = requests.get(url) + data = parser.loads(res.text) + _validate_ndjson(data, client.get_project(project_id)) + + query_str = """mutation createBulkImportRequestPyApi( + $projectId: ID!, $name: String!, $fileUrl: String!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + fileUrl: $fileUrl + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name, "fileUrl": url} + bulk_import_request_response = client.execute(query_str, params=params) + return cls( + client, bulk_import_request_response["createBulkImportRequest"] + ) + + @classmethod + def create_from_objects( + cls, + client, + project_id: str, + name: str, + predictions: Union[Iterable[Dict], Iterable["Label"]], + validate=True, + ) -> "BulkImportRequest": + """ + Creates a `BulkImportRequest` from an iterable of dictionaries. + + Conforms to JSON predictions format, e.g.: + ``{ + "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", + "schemaId": "ckappz7d700gn0zbocmqkwd9i", + "dataRow": { + "id": "ck1s02fqxm8fi0757f0e6qtdc" + }, + "bbox": { + "top": 48, + "left": 58, + "height": 865, + "width": 1512 + } + }`` + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + predictions (Iterable[dict]): iterable of dictionaries representing predictions + validate (bool): a flag indicating if there should be a validation + if `predictions` is valid ndjson + Returns: + BulkImportRequest object + """ + if not isinstance(predictions, list): + raise TypeError( + f"annotations must be in a form of Iterable. Found {type(predictions)}" + ) + ndjson_predictions = serialize_labels(predictions) + + if validate: + _validate_ndjson(ndjson_predictions, client.get_project(project_id)) + + data_str = parser.dumps(ndjson_predictions) + if not data_str: + raise ValueError("annotations cannot be empty") + + data = data_str.encode("utf-8") + file_name = _make_file_name(project_id, name) + request_data = _make_request_data( + project_id, name, len(data_str), file_name + ) + file_data = (file_name, data, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, + request_data=request_data, + file_name=file_name, + file_data=file_data, + ) + + return cls(client, response_data["createBulkImportRequest"]) + + @classmethod + def create_from_local_file( + cls, client, project_id: str, name: str, file: Path, validate_file=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a local ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + file (Path): local ndjson file with predictions + validate_file (bool): a flag indicating if there should be a validation + if `file` is a valid ndjson file + Returns: + BulkImportRequest object + + """ + file_name = _make_file_name(project_id, name) + content_length = file.stat().st_size + request_data = _make_request_data( + project_id, name, content_length, file_name + ) + + with file.open("rb") as f: + if validate_file: + reader = parser.reader(f) + # ensure that the underlying json load call is valid + # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 + # by iterating through the file so we only store + # each line in memory rather than the entire file + try: + _validate_ndjson(reader, client.get_project(project_id)) + except ValueError: + raise ValueError(f"{file} is not a valid ndjson file") + else: + f.seek(0) + file_data = (file.name, f, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, request_data, file_name, file_data + ) + return cls(client, response_data["createBulkImportRequest"]) + + def delete(self) -> None: + """Deletes the import job and also any annotations created by this import. + + Returns: + None + """ + id_param = "bulk_request_id" + query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { + deleteBulkImportRequest(where: {id: $%s}) { + id + name + } + }""" % (id_param, id_param) + self.client.execute(query_str, {id_param: self.uid}) + + +def _validate_ndjson( + lines: Iterable[Dict[str, Any]], project: "Project" +) -> None: + """ + Client side validation of an ndjson object. + + Does not guarentee that an upload will succeed for the following reasons: + * We are not checking the data row types which will cause the following errors to slip through + * Missing frame indices will not causes an error for videos + * Uploaded annotations for the wrong data type will pass (Eg. entity on images) + * We are not checking bounds of an asset (Eg. frame index, image height, text location) + + Args: + lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines + project (Project): id of project for which predictions will be imported + + Raises: + MALValidationError: Raise for invalid NDJson + UuidError: Duplicate UUID in upload + """ + feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( + project.ontology() + ) + uids: Set[str] = set() + for idx, line in enumerate(lines): + try: + annotation = NDAnnotation(**line) + annotation.validate_instance( + feature_schemas_by_id, feature_schemas_by_name + ) + uuid = str(annotation.uuid) + if uuid in uids: + raise lb_exceptions.UuidError( + f"{uuid} already used in this import job, " + "must be unique for the project." + ) + uids.add(uuid) + except (ValidationError, ValueError, TypeError, KeyError) as e: + raise lb_exceptions.MALValidationError( + f"Invalid NDJson on line {idx}" + ) from e + + +# The rest of this file contains objects for MAL validation +def parse_classification(tool): + """ + Parses a classification from an ontology. Only radio, checklist, and text are supported for mal + + Args: + tool (dict) + + Returns: + dict + """ + if tool["type"] in ["radio", "checklist"]: + option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] + option_names = [r["value"] for r in tool["options"]] + return { + "tool": tool["type"], + "featureSchemaId": tool["featureSchemaId"], + "name": tool["name"], + "options": [*option_schema_ids, *option_names], + } + elif tool["type"] == "text": + return { + "tool": tool["type"], + "name": tool["name"], + "featureSchemaId": tool["featureSchemaId"], + } + + +def get_mal_schemas(ontology): + """ + Converts a project ontology to a dict for easier lookup during ndjson validation + + Args: + ontology (Ontology) + Returns: + Dict, Dict : Useful for looking up a tool from a given feature schema id or name + """ + + valid_feature_schemas_by_schema_id = {} + valid_feature_schemas_by_name = {} + for tool in ontology.normalized["tools"]: + classifications = [ + parse_classification(classification_tool) + for classification_tool in tool["classifications"] + ] + classifications_by_schema_id = { + v["featureSchemaId"]: v for v in classifications + } + classifications_by_name = {v["name"]: v for v in classifications} + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + valid_feature_schemas_by_name[tool["name"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + for tool in ontology.normalized["classifications"]: + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( + parse_classification(tool) + ) + valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) + return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name + + +class Bbox(BaseModel): + top: float + left: float + height: float + width: float + + +class Point(BaseModel): + x: float + y: float + + +class FrameLocation(BaseModel): + end: int + start: int + + +class VideoSupported(BaseModel): + # Note that frames are only allowed as top level inferences for video + frames: Optional[List[FrameLocation]] = None + + +# Base class for a special kind of union. +class SpecialUnion: + def __new__(cls, **kwargs): + return cls.build(kwargs) + + @classmethod + def __get_validators__(cls): + yield cls.build + + @classmethod + def get_union_types(cls): + if not issubclass(cls, SpecialUnion): + raise TypeError("{} must be a subclass of SpecialUnion") + + union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] + if len(union_types) < 1: + raise TypeError( + "Class {cls} should inherit from a union of objects to build" + ) + if len(union_types) > 1: + raise TypeError( + f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" + ) + return union_types[0].__args__[0].__args__ + + @classmethod + def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": + """ + Checks through all objects in the union to see which matches the input data. + Args: + data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union + raises: + KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion + ValidationError: Error while trying to construct a specific object in the union + + """ + if isinstance(data, BaseModel): + data = data.model_dump() + + top_level_fields = [] + max_match = 0 + matched = None + + for type_ in cls.get_union_types(): + determinate_fields = _determinants(type_) + top_level_fields.append(determinate_fields) + matches = sum([val in determinate_fields for val in data]) + if matches == len(determinate_fields) and matches > max_match: + max_match = matches + matched = type_ + + if matched is not None: + # These two have the exact same top level keys + if matched in [NDRadio, NDText]: + if isinstance(data["answer"], dict): + matched = NDRadio + elif isinstance(data["answer"], str): + matched = NDText + else: + raise TypeError( + f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" + ) + return matched(**data) + else: + raise KeyError( + f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." + ) + + @classmethod + def schema(cls): + results = {"definitions": {}} + for cl in cls.get_union_types(): + schema = cl.schema() + results["definitions"].update(schema.pop("definitions")) + results[cl.__name__] = schema + return results + + +class DataRow(BaseModel): + id: str + + +class NDFeatureSchema(BaseModel): + schemaId: Optional[str] = None + name: Optional[str] = None + + @model_validator(mode="after") + def most_set_one(self): + if self.schemaId is None and self.name is None: + raise ValueError( + "Must set either schemaId or name for all feature schemas" + ) + return self + + +class NDBase(NDFeatureSchema): + ontology_type: str + uuid: UUID + dataRow: DataRow + model_config = ConfigDict(extra="forbid") + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + if self.name: + if self.name not in valid_feature_schemas_by_name: + raise ValueError( + f"Name {self.name} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_name[self.name]["tool"] + ): + raise ValueError( + f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" + ) + + if self.schemaId: + if self.schemaId not in valid_feature_schemas_by_id: + raise ValueError( + f"Schema id {self.schemaId} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_id[self.schemaId]["tool"] + ): + raise ValueError( + f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" + ) + + def validate_instance( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + self.validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + + +###### Classifications ###### + + +class NDText(NDBase): + ontology_type: Literal["text"] = "text" + answer: str = Field(json_schema_extra={"determinant": True}) + # No feature schema to check + + +class NDChecklist(VideoSupported, NDBase): + ontology_type: Literal["checklist"] = "checklist" + answers: List[NDFeatureSchema] = Field( + json_schema_extra={"determinant": True} + ) + + @field_validator("answers", mode="before") + def validate_answers(cls, value, field): + # constr not working with mypy. + if not len(value): + raise ValueError("Checklist answers should not be empty") + return value + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + # Test top level feature schema for this tool + super(NDChecklist, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + # Test the feature schemas provided to the answer field + if len( + set([answer.name or answer.schemaId for answer in self.answers]) + ) != len(self.answers): + raise ValueError( + f"Duplicated featureSchema found for checklist {self.uuid}" + ) + for answer in self.answers: + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if answer.name not in options and answer.schemaId not in options: + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" + ) + + +class NDRadio(VideoSupported, NDBase): + ontology_type: Literal["radio"] = "radio" + answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDRadio, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if ( + self.answer.name not in options + and self.answer.schemaId not in options + ): + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" + ) + + +# A union with custom construction logic to improve error messages +class NDClassification( + SpecialUnion, + Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore +): ... + + +###### Tools ###### + + +class NDBaseTool(NDBase): + classifications: List[NDClassification] = [] + + # This is indepdent of our problem + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDBaseTool, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + for classification in self.classifications: + classification.validate_feature_schemas( + valid_feature_schemas_by_name[self.name][ + "classificationsBySchemaId" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsBySchemaId" + ], + valid_feature_schemas_by_name[self.name][ + "classificationsByName" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsByName" + ], + ) + + @field_validator("classifications", mode="before") + def validate_subclasses(cls, value, field): + # Create uuid and datarow id so we don't have to define classification objects twice + # This is caused by the fact that we require these ids for top level classifications but not for subclasses + results = [] + dummy_id = "child".center(25, "_") + for row in value: + results.append( + {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} + ) + return results + + +class NDPolygon(NDBaseTool): + ontology_type: Literal["polygon"] = "polygon" + polygon: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("polygon") + def is_geom_valid(cls, v): + if len(v) < 3: + raise ValueError( + f"A polygon must have at least 3 points to be valid. Found {v}" + ) + return v + + +class NDPolyline(NDBaseTool): + ontology_type: Literal["line"] = "line" + line: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("line") + def is_geom_valid(cls, v): + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + return v + + +class NDRectangle(NDBaseTool): + ontology_type: Literal["rectangle"] = "rectangle" + bbox: Bbox = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class NDPoint(NDBaseTool): + ontology_type: Literal["point"] = "point" + point: Point = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class EntityLocation(BaseModel): + start: int + end: int + + +class NDTextEntity(NDBaseTool): + ontology_type: Literal["named-entity"] = "named-entity" + location: EntityLocation = Field(json_schema_extra={"determinant": True}) + + @field_validator("location") + def is_valid_location(cls, v): + if isinstance(v, BaseModel): + v = v.model_dump() + + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + if v["start"] < 0: + raise ValueError(f"Text location must be positive. Found {v}") + if v["start"] > v["end"]: + raise ValueError( + f"Text start location must be less or equal than end. Found {v}" + ) + return v + + +class RLEMaskFeatures(BaseModel): + counts: List[int] + size: List[int] + + @field_validator("counts") + def validate_counts(cls, counts): + if not all([count >= 0 for count in counts]): + raise ValueError( + "Found negative value for counts. They should all be zero or positive" + ) + return counts + + @field_validator("size") + def validate_size(cls, size): + if len(size) != 2: + raise ValueError( + f"Mask `size` should have two ints representing height and with. Found : {size}" + ) + if not all([count > 0 for count in size]): + raise ValueError( + f"Mask `size` should be a postitive int. Found : {size}" + ) + return size + + +class PNGMaskFeatures(BaseModel): + # base64 encoded png bytes + png: str + + +class URIMaskFeatures(BaseModel): + instanceURI: str + colorRGB: Union[List[int], Tuple[int, int, int]] + + @field_validator("colorRGB") + def validate_color(cls, colorRGB): + # Does the dtype matter? Can it be a float? + if not isinstance(colorRGB, (tuple, list)): + raise ValueError( + f"Received color that is not a list or tuple. Found : {colorRGB}" + ) + elif len(colorRGB) != 3: + raise ValueError( + f"Must provide RGB values for segmentation colors. Found : {colorRGB}" + ) + elif not all([0 <= color <= 255 for color in colorRGB]): + raise ValueError( + f"All rgb colors must be between 0 and 255. Found : {colorRGB}" + ) + return colorRGB + + +class NDMask(NDBaseTool): + ontology_type: Literal["superpixel"] = "superpixel" + mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( + json_schema_extra={"determinant": True} + ) + + +# A union with custom construction logic to improve error messages +class NDTool( + SpecialUnion, + Type[ # type: ignore + Union[ + NDMask, + NDTextEntity, + NDPoint, + NDRectangle, + NDPolyline, + NDPolygon, + ] + ], +): ... + + +class NDAnnotation( + SpecialUnion, + Type[Union[NDTool, NDClassification]], # type: ignore +): + @classmethod + def build(cls: Any, data) -> "NDBase": + if not isinstance(data, dict): + raise ValueError("value must be dict") + errors = [] + for cl in cls.get_union_types(): + try: + return cl(**data) + except KeyError as e: + errors.append(f"{cl.__name__}: {e}") + + raise ValueError( + "Unable to construct any annotation.\n{}".format("\n".join(errors)) + ) + + @classmethod + def schema(cls): + data = {"definitions": {}} + for type_ in cls.get_union_types(): + schema_ = type_.schema() + data["definitions"].update(schema_.pop("definitions")) + data[type_.__name__] = schema_ + return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index dfc87c8a4..6f8aebc58 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,6 +1,31 @@ from enum import Enum +class BulkImportRequestState(Enum): + """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). + + If you are not usinig MEA continue using BulkImportRequest. + AnnotationImports are in beta and will change soon. + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - State + - Description + * - RUNNING + - Indicates that the import job is not done yet. + * - FAILED + - Indicates the import job failed. Check `BulkImportRequest.errors` for more information + * - FINISHED + - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information + """ + + RUNNING = "RUNNING" + FAILED = "FAILED" + FINISHED = "FINISHED" + + class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f2de4db5e..f8876f7c4 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,29 +5,36 @@ import warnings from collections import namedtuple from datetime import datetime, timezone +from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, + Iterable, List, Optional, Tuple, + TypeVar, Union, overload, ) +from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +import requests +from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, + ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -39,6 +46,7 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, + validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -55,6 +63,7 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, + OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -63,7 +72,7 @@ ) if TYPE_CHECKING: - pass + from labelbox import BulkImportRequest DataRowPriority = int @@ -570,7 +579,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - "This function has only been tested to work with the Editor front end. Found %s", + f"This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -805,7 +814,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - "Batch exceeds max size, break into smaller batches" + f"Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1079,7 +1088,8 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Batch was not created successfully: " + json.dumps(task.errors) + f"Batch was not created successfully: " + + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1426,7 +1436,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Priority was not updated successfully: " + f"Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1478,6 +1488,33 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] + def bulk_import_requests(self) -> PaginatedCollection: + """Returns bulk import request objects which are used in model-assisted labeling. + These are returned with the oldest first, and most recent last. + """ + + id_param = "project_id" + query_str = """query ListAllImportRequestsPyApi($%s: ID!) { + bulkImportRequests ( + where: { projectId: $%s } + skip: %%d + first: %%d + ) { + %s + } + }""" % ( + id_param, + id_param, + query.results_query_part(Entity.BulkImportRequest), + ) + return PaginatedCollection( + self.client, + query_str, + {id_param: str(self.uid)}, + ["bulkImportRequests"], + Entity.BulkImportRequest, + ) + def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1592,7 +1629,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Data rows were not moved successfully: " + f"Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1602,6 +1639,77 @@ def _wait_for_task(self, task_id: str) -> Task: return task + def upload_annotations( + self, + name: str, + annotations: Union[str, Path, Iterable[Dict]], + validate: bool = False, + ) -> "BulkImportRequest": # type: ignore + """Uploads annotations to a new Editor project. + + Args: + name (str): name of the BulkImportRequest job + annotations (str or Path or Iterable): + url that is publicly accessible by Labelbox containing an + ndjson file + OR local path to an ndjson file + OR iterable of annotation rows + validate (bool): + Whether or not to validate the payload before uploading. + Returns: + BulkImportRequest + """ + + if isinstance(annotations, str) or isinstance(annotations, Path): + + def _is_url_valid(url: Union[str, Path]) -> bool: + """Verifies that the given string is a valid url. + + Args: + url: string to be checked + Returns: + True if the given url is valid otherwise False + + """ + if isinstance(url, Path): + return False + parsed = urlparse(url) + return bool(parsed.scheme) and bool(parsed.netloc) + + if _is_url_valid(annotations): + return Entity.BulkImportRequest.create_from_url( + client=self.client, + project_id=self.uid, + name=name, + url=str(annotations), + validate=validate, + ) + else: + path = Path(annotations) + if not path.exists(): + raise FileNotFoundError( + f"{annotations} is not a valid url nor existing local file" + ) + return Entity.BulkImportRequest.create_from_local_file( + client=self.client, + project_id=self.uid, + name=name, + file=path, + validate_file=validate, + ) + elif isinstance(annotations, Iterable): + return Entity.BulkImportRequest.create_from_objects( + client=self.client, + project_id=self.uid, + name=name, + predictions=annotations, # type: ignore + validate=validate, + ) + else: + raise ValueError( + f"Invalid annotations given of type: {type(annotations)}" + ) + def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py new file mode 100644 index 000000000..9abae1422 --- /dev/null +++ b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py @@ -0,0 +1,258 @@ +from unittest.mock import patch +import uuid +from labelbox import parser, Project +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +import pytest +import random +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.rectangle import ( + Rectangle, + RectangleUnit, +) +from labelbox.data.annotation_types.label import Label +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.ner import ( + DocumentEntity, + DocumentTextSelection, +) +from labelbox.data.annotation_types.video import VideoObjectAnnotation + +from labelbox.data.serialization import NDJsonConverter +from labelbox.exceptions import MALValidationError, UuidError +from labelbox.schema.bulk_import_request import BulkImportRequest +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.annotation_import import LabelImport, MALPredictionImport +from labelbox.schema.media_type import MediaType + +""" +- Here we only want to check that the uploads are calling the validation +- Then with unit tests we can check the types of errors raised +""" +# TODO: remove library once bulk import requests are removed + + +@pytest.mark.order(1) +def test_create_from_url(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_file(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + with pytest.raises(MALValidationError): + module_project.upload_annotations( + name=name, annotations=url, validate=True + ) + # Schema ids shouldn't match + + +def test_create_from_objects( + module_project: Project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_create_from_label_objects( + module_project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + labels = list(NDJsonConverter.deserialize(predictions)) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=labels + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + normalized_predictions = list(NDJsonConverter.serialize(labels)) + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, normalized_predictions + ) + + +def test_create_from_local_file( + tmp_path, predictions, module_project, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + file_name = f"{name}.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + parser.dump(predictions, f) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=str(file_path), validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_get(client, module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + bulk_import_request = BulkImportRequest.from_name( + client, project_id=module_project.uid, name=name + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_ndjson(tmp_path, module_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + +def test_validate_ndjson_uuid(tmp_path, module_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + uid = str(uuid.uuid4()) + repeat_uuid[0]["uuid"] = uid + repeat_uuid[1]["uuid"] = uid + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + +@pytest.mark.skip( + "Slow test and uses a deprecated api endpoint for annotation imports" +) +def test_wait_till_done(rectangle_inference, project): + name = str(uuid.uuid4()) + url = project.client.upload_data( + content=parser.dumps(rectangle_inference), sign=True + ) + bulk_import_request = project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert len(bulk_import_request.inputs) == 1 + bulk_import_request.wait_until_done() + assert bulk_import_request.state == BulkImportRequestState.FINISHED + + # Check that the status files are being returned as expected + assert len(bulk_import_request.errors) == 0 + assert len(bulk_import_request.inputs) == 1 + assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] + assert len(bulk_import_request.statuses) == 1 + assert bulk_import_request.statuses[0]["status"] == "SUCCESS" + assert ( + bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] + ) + + +def test_project_bulk_import_requests(module_project, predictions): + result = module_project.bulk_import_requests() + assert len(list(result)) == 0 + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + result = module_project.bulk_import_requests() + assert len(list(result)) == 3 + + +def test_delete(module_project, predictions): + name = str(uuid.uuid4()) + + bulk_import_requests = module_project.bulk_import_requests() + [ + bulk_import_request.delete() + for bulk_import_request in bulk_import_requests + ] + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 1 + + bulk_import_request.delete() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index 9e8963a26..a0df559fc 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,8 +1,24 @@ +from labelbox.schema.media_type import MediaType +from labelbox.schema.project import Project import pytest -from pytest_cases import fixture_ref, parametrize + +from labelbox import parser +from pytest_cases import parametrize, fixture_ref from labelbox.exceptions import MALValidationError -from labelbox.schema.media_type import MediaType +from labelbox.schema.bulk_import_request import ( + NDChecklist, + NDClassification, + NDMask, + NDPolygon, + NDPolyline, + NDRadio, + NDRectangle, + NDText, + NDTextEntity, + NDTool, + _validate_ndjson, +) """ - These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed @@ -175,6 +191,39 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) +def test_validate_ndjson(tmp_path, configured_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + configured_project.upload_annotations( + name="name", annotations=str(file_path), validate=True + ) + + +def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + repeat_uuid[0]["uuid"] = "test_uuid" + repeat_uuid[1]["uuid"] = "test_uuid" + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(MALValidationError): + configured_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(MALValidationError): + configured_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json new file mode 100644 index 000000000..4de15e217 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json @@ -0,0 +1,54 @@ +[ + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "confidence": 0.8, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" + }, + { + "answer": [ + { + "schemaId": "ckrb1sfl8099e0y919v260awv", + "confidence": 0.82, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } + ], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json new file mode 100644 index 000000000..83a95e5bf --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json @@ -0,0 +1,25 @@ +[{ + "location": { + "start": 67, + "end": 128 + }, + "messageId": "some-message-id", + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] +}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 75fe36e44..91563b8ae 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,17 +8,16 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - }, - "classifications": [] + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -29,17 +28,20 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - }, - "classifications": [] + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -48,39 +50,762 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 }, { - "x": 15.0, - "y": 20.0 + "x": 1119, + "y": 934 }, { - "x": 20.0, - "y": 25.0 + "x": 1118, + "y": 935 }, { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json new file mode 100644 index 000000000..591e40cf6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json @@ -0,0 +1,823 @@ +[ + { + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + "schemaId": "ckrazcueb16og0z6609jj7y3y", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + { + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + "schemaId": "ckrazcuec16ok0z66f956apb7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.834, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } + }, + { + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + "schemaId": "ckrazcuec16oi0z66dzrd8pfl", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.986, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "polygon": [ + { + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 + } + ] + }, + { + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + "schemaId": "ckrazcuec16om0z66bhhh4tp7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "point": { + "x": 2122, + "y": 1457 + } + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 466a03594..82be4cdab 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,86 +1,826 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "ckrazcueb16og0z6609jj7y3y", + "name": "box a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "confidence": 0.854, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.7 } - ], - "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - } + ] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "ckrazcuec16ok0z66f956apb7", + "name": "mask a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.834, + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + }, + "confidence": 0.685, "customMetrics": [ { "name": "customMetric1", - "value": 0.3 + "value": 0.4 + }, + { + "name": "customMetric2", + "value": 0.9 } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - } + ] }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "ckrazcuec16oi0z66dzrd8pfl", + "name": "polygon a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.986, + "confidence": 0.71, "customMetrics": [ { "name": "customMetric1", - "value": 0.9 + "value": 0.1 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 }, { - "x": 15.0, - "y": 20.0 + "x": 1099, + "y": 911 }, { - "x": 20.0, - "y": 25.0 + "x": 1100, + "y": 911 }, { - "x": 10.0, - "y": 20.0 + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "ckrazcuec16om0z66bhhh4tp7", + "name": "point a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, + "confidence": 0.77, + "customMetrics": [ + { + "name": "customMetric2", + "value": 1.2 + } + ], "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json new file mode 100644 index 000000000..31be5a4c7 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json @@ -0,0 +1,10 @@ +[ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "aggregation": "ARITHMETIC_MEAN", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "metricValue": 0.1 + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json new file mode 100644 index 000000000..f4b4894f6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json @@ -0,0 +1,155 @@ +[{ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 4, + "unit": "POINTS", + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 162.73, + "left": 32.45, + "height": 388.16999999999996, + "width": 101.66000000000001 + } +}, { + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "bbox": { + "top": 223.26, + "left": 251.42, + "height": 457.03999999999996, + "width": 186.78 + } +}, { + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 6, + "unit": "POINTS", + "confidence": 0.99, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 32.52, + "left": 218.17, + "height": 231.73, + "width": 110.56000000000003 + } +}, { + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "confidence": 0.89, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 117.39, + "left": 4.25, + "height": 456.9200000000001, + "width": 164.83 + } +}, { + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 8, + "unit": "POINTS", + "bbox": { + "top": 82.13, + "left": 217.28, + "height": 279.76, + "width": 82.43000000000004 + } +}, { + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 3, + "unit": "POINTS", + "bbox": { + "top": 298.12, + "left": 83.34, + "height": 203.83000000000004, + "width": 0.37999999999999545 + } +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "named_entity", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] +} +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json new file mode 100644 index 000000000..d6a9eecbd --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json @@ -0,0 +1,36 @@ +[ + { + "line": [ + { + "x": 2534.353, + "y": 249.471 + }, + { + "x": 2429.492, + "y": 182.092 + }, + { + "x": 2294.322, + "y": 221.962 + } + ], + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-line", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.58, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json new file mode 100644 index 000000000..1f26d8dc8 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json @@ -0,0 +1,26 @@ +[ + { + "location": { + "start": 67, + "end": 128 + }, + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json new file mode 100644 index 000000000..11e0753d9 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json @@ -0,0 +1,166 @@ +[{ + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" + }, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "schemaId": "ckrb1sfl8099e0y919v260awv" + }], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" +}, { + "classifications": [], + "schemaId": + "cl5islwg200gfci6g0oitaypu", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 + }, + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5iw0roz00lwci6g5jni62vs", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 + }, + "classifications": [] + }] + }] +}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..0bc3c8924 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,6 +37,13 @@ def test_serialization_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_classification(): label = Label( @@ -127,6 +134,12 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested(): label = Label( @@ -220,6 +233,13 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested_2(): label = Label( @@ -310,3 +330,9 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 82adce99c..8dcb17f0b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,73 +1,15 @@ import json -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - Radio, - Text, -) -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - ClassificationAnswer, -) -from labelbox.data.mixins import CustomMetric - def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.8, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.82, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -76,48 +18,6 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - name="classification a", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="choice 1", - ), - ), - ), - ClassificationAnnotation( - name="classification b", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.945, - name="choice 2", - ) - ], - ), - ), - ClassificationAnnotation( - name="classification c", - extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..f7da9181b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,12 +1,8 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -103,62 +99,25 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - -def test_conversation_entity_import(): - with open( - "tests/data/assets/ndjson/conversation_entity_import.json", "r" - ) as file: - data = json.load(file) - - label = lb_types.Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) - assert res == data + deserialized_label = list(NDJsonConverter().deserialize(ndjson)) + deserialized_label[0].annotations[0].extra.pop("uuid") + assert deserialized_label[0].model_dump(exclude_none=True) == label[ + 0 + ].model_dump(exclude_none=True) -def test_conversation_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/conversation_entity_import.json", "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_conversation_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - label = lb_types.Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, extra={}, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 999e1bda5..333c00250 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,29 +1,67 @@ +from copy import copy +import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter +from labelbox.data.serialization.ndjson.objects import ( + NDDicomSegments, + NDDicomSegment, + NDDicomLine, +) + +""" +Data gen prompt test data +""" + +prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), +) + +prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, +} + +data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], +) + +""" +Prompt annotation test +""" def test_serialize_label(): - prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - extra={"uuid": "test"}, - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), - ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + # Remove uuid field since this is a random value that can not be specified also meant for relationships + del serialized_label["uuid"] + assert serialized_label == prompt_text_ndjson + - prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "test", - } - - data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], +def test_deserialize_label(): + deserialized_label = next( + NDJsonConverter().deserialize([prompt_text_ndjson]) ) - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + if hasattr(deserialized_label.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized_label.annotations[0].extra = {} + assert deserialized_label.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) - assert serialized_label == prompt_text_ndjson + +def test_serialize_deserialize_label(): + serialized = list(NDJsonConverter.serialize([data_gen_label])) + deserialized = next(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized.annotations[0].extra = {} + assert deserialized.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..633214367 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,5 +1,6 @@ from copy import copy import pytest +import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -180,3 +181,28 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson + + +@pytest.mark.parametrize("label, ndjson", labels_ndjsons) +def test_deserialize_label(label, ndjson): + deserialized_label = next(NDJsonConverter().deserialize([ndjson])) + if hasattr(deserialized_label.annotations[0], "extra"): + deserialized_label.annotations[0].extra = {} + for i, annotation in enumerate(deserialized_label.annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value + + +@pytest.mark.parametrize("label", labels) +def test_serialize_deserialize_label(label): + serialized = list(NDJsonConverter.serialize([label])) + deserialized = list(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized[0].annotations[0], "extra"): + deserialized[0].annotations[0].extra = {} + for i, annotation in enumerate(deserialized[0].annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..5fe6a9789 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,19 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - RectangleUnit, - Point, - DocumentRectangle, - DocumentEntity, - DocumentTextSelection, -) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -66,144 +53,10 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - labels = [ - Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.89, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_with_name_only(): @@ -212,135 +65,26 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id=None, - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.74, - name="boxy", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson + + +def test_pdf_bbox_deserialize(): + deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) + deserialized[0].annotations[0].extra = {} + assert ( + deserialized[0].annotations[0].value + == bbox_labels[0].annotations[0].value + ) + assert ( + deserialized[0].annotations[0].name + == bbox_labels[0].annotations[0].name + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 1ab678cde..4adcd9935 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,6 +9,8 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", + file_path=None, + frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -20,7 +22,6 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", - "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -587,4 +588,31 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - assert label == video_serialized_bbox_label() + manual_label = video_serialized_bbox_label() + + for key in label.keys(): + # ignore uuid because we randomize if there was none + if key != "uuid": + assert label[key] == manual_label[key] + + assert len(label["segments"]) == 2 + assert len(label["segments"][0]["keyframes"]) == 2 + assert len(label["segments"][1]["keyframes"]) == 4 + + # #converts back only the keyframes. should be the sum of all prev segments + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + assert len(label.annotations) == 6 + + +def test_confidence_is_ignored(): + label = video_bbox_label() + serialized_labels = NDJsonConverter.serialize([label]) + label = next(serialized_labels) + label["confidence"] = 0.453 + label["segments"][0]["confidence"] = 0.453 + + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + for annotation in label.annotations: + assert annotation.confidence is None diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..84c017497 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,6 +34,16 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "text_answer" + assert annotation_value.confidence == 0.5 + def test_nested_serialization(): label = Label( @@ -92,3 +102,19 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + answer = annotation.value.answer[0] + assert answer.confidence == 0.9 + assert answer.name == "first_answer" + + classification_answer = answer.classifications[0].value.answer + assert classification_answer.confidence == 0.8 + assert classification_answer.name == "first_sub_radio_answer" + + sub_classification_answer = classification_answer.classifications[0].value + assert type(sub_classification_answer) is Text + assert sub_classification_answer.answer == "nested answer" + assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index d104a691e..2b3fa7f8c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,74 +1,73 @@ -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import json +import pytest + +from labelbox.data.serialization.ndjson.classification import NDRadio + from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - Radio, - ClassificationAnswer, -) +from labelbox.data.serialization.ndjson.objects import NDLine -def test_generic_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def round_dict(data): + if isinstance(data, dict): + for key in data: + if isinstance(data[key], float): + data[key] = int(data[key]) + elif isinstance(data[key], dict): + data[key] = round_dict(data[key]) + elif isinstance(data[key], (list, tuple)): + data[key] = [round_dict(r) for r in data[key]] - label = Label( - data=GenericDataRowData( - global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) + return data + + +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/classification_import_global_key.json", + "tests/data/assets/ndjson/metric_import_global_key.json", + "tests/data/assets/ndjson/polyline_import_global_key.json", + "tests/data/assets/ndjson/text_entity_import_global_key.json", + "tests/data/assets/ndjson/conversation_entity_import_global_key.json", + ], +) +def test_many_types(filename: str): + with open(filename, "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == data + f.close() - res = list(NDJsonConverter.serialize([label])) - assert res == expected +def test_image(): + with open( + "tests/data/assets/ndjson/image_import_global_key.json", "r" + ) as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() -def test_dict_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def test_pdf(): + with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() - label = Label( - data={ - "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", - }, - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) - res = list(NDJsonConverter.serialize([label])) +def test_video(): + with open( + "tests/data/assets/ndjson/video_import_global_key.json", "r" + ) as f: + data = json.load(f) - assert res == expected + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] + f.close() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..1729e1f46 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,8 +1,4 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -14,7 +10,6 @@ ImageData, MaskData, ) -from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -34,74 +29,12 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_image_with_name_only(): @@ -110,74 +43,11 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - name="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - name="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - name="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - name="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask(): @@ -187,11 +57,10 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], - "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -199,54 +68,16 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": (255, 0, 0), + "colorRGB": [255, 0, 0], }, - "classifications": [], }, ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) - mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) - mask_numpy = mask_numpy.astype(np.uint8) - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.8, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Mask( - mask=MaskData(arr=mask_numpy), - color=(1, 1, 1), - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - extra={}, - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=(255, 0, 0), - ), - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 40e098405..45c5c67bf 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,166 +1,38 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.annotation_types.metrics.confusion_matrix import ( - ConfusionMatrixMetric, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ScalarMetric, - ScalarMetricAggregation, - ConfusionMatrixAggregation, -) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert reserialized == data def test_custom_scalar_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: 0.1, 0.2: 0.5}, - "metricName": "custom_iou", - "aggregation": "SUM", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value=0.1, - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value={"0.1": 0.1, "0.2": 0.5}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_scalar_import.json", "r" + ) as file: + data = json.load(file) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) def test_custom_confusion_matrix_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (1, 1, 2, 3), - "metricName": "50%_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (0, 1, 2, 5), - "metricName": "50%_iou", - "featureName": "sample_class", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - "metricName": "50%_iou", - "aggregation": "CONFUSION_MATRIX", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ConfusionMatrixMetric( - value=(1, 1, 2, 3), - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value=(0, 1, 2, 5), - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" + ) as file: + data = json.load(file) - assert data == res + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 202f793fe..69594ff73 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,125 +1,32 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest from labelbox.data.serialization import NDJsonConverter -from labelbox.types import ( - Label, - MessageEvaluationTaskAnnotation, - MessageSingleSelectionTask, - MessageMultiSelectionTask, - MessageInfo, - OrderedMessageInfo, - MessageRankingTask, -) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cnjencjencjfencvj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="single-selection", - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, - value=MessageSingleSelectionTask( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - parent_message_id="clxfznjb800073b6v43ppx9ca", - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cfcerfvergerfefj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="multi-selection", - extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, - value=MessageMultiSelectionTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - selected_messages=[ - MessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - ) - ], - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=2, - ), - ], - ), - ) - ], - ), - ] + deserialized = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(deserialized)) - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert data == reserialized def test_mesage_ranking_task_wrong_order_serialization(): + with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: + data = json.load(file) + + some_ranking_task = next( + task + for task in data + if task["messageEvaluationTask"]["format"] == "message-ranking" + ) + some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ + "order" + ] = 3 + with pytest.raises(ValueError): - ( - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={ - "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" - }, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - ], - ), - ) - ], - ), - ) + list(NDJsonConverter.deserialize([some_ranking_task])) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py new file mode 100644 index 000000000..790bd87b3 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py @@ -0,0 +1,19 @@ +import json +from labelbox.data.serialization.ndjson.label import NDLabel +from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle +import pytest + + +def test_bad_annotation_input(): + data = [{"test": 3}] + with pytest.raises(ValueError): + NDLabel(**{"annotations": data}) + + +def test_correct_annotation_input(): + with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: + data = json.load(f) + assert isinstance( + NDLabel(**{"annotations": [data[0]]}).annotations[0], + NDDocumentRectangle, + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index 3633c9cbe..e0f0df0e6 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,135 +1,13 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Rectangle, - Point, - ClassificationAnnotation, - Radio, - ClassificationAnswer, - Text, - Checklist, -) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.34, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "5d03213e-4408-456c-9eca-cf0723202961", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.894, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={}, - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -138,112 +16,6 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="box a", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.811, - name="first answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box b", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification b", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.815, - name="second answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification c", - value=Checklist( - answer=[ - ClassificationAnswer( - name="third answer", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="a string", - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index cd11d97fe..97d48a14e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,76 +1,18 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ObjectAnnotation, Point, Line, Label - - -def test_polyline_import_with_confidence(): - with open( - "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" - ) as file: - data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_polyline_import_without_confidence(): - with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/polyline_without_confidence_import.json", + "tests/data/assets/ndjson/polyline_import.json", + ], +) +def test_polyline_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.58, - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..bd80f9267 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,3 +1,4 @@ +import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -39,6 +40,14 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_radio_classification(): label = Label( @@ -92,3 +101,10 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations[0].model_dump( + exclude_none=True + ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 0e42ab152..66630dbb5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,10 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -12,26 +8,8 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="bbox", - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - }, - value=Rectangle( - start=Point(x=38.0, y=28.0), - end=Point(x=81.0, y=69.0), - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -61,6 +39,8 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, }, ) @@ -68,9 +48,8 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - data = list(NDJsonConverter.serialize([label])) - - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_rectangle_mixed_start_end_points(): @@ -97,13 +76,17 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, + }, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - data = list(NDJsonConverter.serialize([label])) - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index 235b66957..f33719035 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,135 +1,16 @@ import json +from uuid import uuid4 -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Point, - Rectangle, - RelationshipAnnotation, - Relationship, -) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = [ - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - extra={}, - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl-DIFFERENT", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - ] + res = list(NDJsonConverter.deserialize(data)) res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -163,3 +44,29 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] + + +def test_relationship_nonexistent_object(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + relationship_annotation = data[2] + source_uuid = relationship_annotation["relationship"]["source"] + target_uuid = str(uuid4()) + relationship_annotation["relationship"]["target"] = target_uuid + error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" + + with pytest.raises(ValueError, match=error_msg): + list(NDJsonConverter.deserialize(data)) + + +def test_relationship_duplicate_uuids(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + source, target = data[0], data[1] + target["uuid"] = source["uuid"] + error_msg = f"UUID '{source['uuid']}' is not unique" + + with pytest.raises(AssertionError, match=error_msg): + list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..d5e81c51a 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,5 +1,7 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnswer, + Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -32,3 +34,11 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index fb93f15d4..3e856f001 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,68 +1,21 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, TextEntity - - -def test_text_entity_import(): - with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: - data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_text_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/text_entity_import.json", "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_text_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..c7a6535c4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json +from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, - Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,10 +13,8 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import ( - VideoClassificationAnnotation, - VideoObjectAnnotation, -) +from labelbox.data.annotation_types.video import VideoObjectAnnotation +from labelbox import parser from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -26,275 +24,15 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), - annotations=[ - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=30, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=31, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=32, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=33, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=34, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=35, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=50, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=51, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=0, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=1, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=2, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=3, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=4, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=5, - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, - value=Text(answer="a value"), - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=10.0, y=10.0), - Point(x=100.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=15.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=100.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=8, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=10.0), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=50.0, y=50.0), - frame=5, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=50.0), - frame=10, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=10.0), - end=Point(x=155.0, y=110.0), - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=30.0), - end=Point(x=155.0, y=80.0), - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=200.0, y=300.0), - end=Point(x=350.0, y=700.0), - ), - frame=10, - keyframe=True, - segment_index=1, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_name_only(): @@ -302,274 +40,16 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), - annotations=[ - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=30, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=31, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=32, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=33, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=34, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=35, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=50, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=51, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=0, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=1, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=2, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=3, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=4, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=5, - ), - ClassificationAnnotation( - name="question 3", - extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, - value=Text(answer="a value"), - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=10.0, y=10.0), - Point(x=100.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=15.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=100.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=8, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=10.0), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=50.0, y=50.0), - frame=5, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=50.0), - frame=10, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=10.0), - end=Point(x=155.0, y=110.0), - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=30.0), - end=Point(x=155.0, y=80.0), - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=200.0, y=300.0), - end=Point(x=350.0, y=700.0), - ), - frame=10, - keyframe=True, - segment_index=1, - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_classification_global_subclassifications(): @@ -587,6 +67,7 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( + name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -613,7 +94,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = { + expected_second_annotation = nested_checklist_annotation_ndjson = { "name": "nested_checklist_question", "answer": [ { @@ -635,6 +116,12 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + assert annotation.name == label.annotations[i].name + def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -800,6 +287,14 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_video_classification_point(): bbox_annotation = [ @@ -950,6 +445,13 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + def test_video_classification_frameline(): bbox_annotation = [ @@ -1117,289 +619,9 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - -[ - { - "answer": "a value", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 5, "start": 0}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5islwg200gfci6g0oitaypu", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - { - "classifications": [], - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - } - ] - }, - ], - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - } - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - }, - { - "classifications": [], - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - }, - ] - }, - ], - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "segments": [ - { - "keyframes": [ - { - "bbox": { - "height": 100.0, - "left": 5.0, - "top": 10.0, - "width": 150.0, - }, - "classifications": [], - "frame": 1, - }, - { - "bbox": { - "height": 50.0, - "left": 5.0, - "top": 30.0, - "width": 150.0, - }, - "classifications": [], - "frame": 5, - }, - ] - }, - { - "keyframes": [ - { - "bbox": { - "height": 400.0, - "left": 200.0, - "top": 300.0, - "width": 150.0, - }, - "classifications": [], - "frame": 10, - } - ] - }, - ], - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - }, -] - -[ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}], - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "classifications": [], - "schemaId": "cl5islwg200gfci6g0oitaypu", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - { - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - } - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - "classifications": [], - } - ] - }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - "classifications": [], - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - "classifications": [], - }, - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0, - }, - "classifications": [], - }, - { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0, - }, - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0, - }, - "classifications": [], - } - ] - }, - ], - }, -] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value From c0892155c54d9c860fdc1cde9ecac36a7282b45c Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:58:36 -0500 Subject: [PATCH 06/35] Removed data types besides generic data row data --- .../data/annotation_types/__init__.py | 14 +- .../data/annotation_types/data/__init__.py | 12 +- .../data/annotation_types/data/audio.py | 7 - .../annotation_types/data/conversation.py | 7 - .../data/annotation_types/data/dicom.py | 7 - .../data/annotation_types/data/document.py | 7 - .../data/annotation_types/data/html.py | 7 - .../data/llm_prompt_creation.py | 7 - .../data/llm_prompt_response_creation.py | 9 - .../data/llm_response_creation.py | 7 - .../data/annotation_types/data/raster.py | 5 +- .../data/annotation_types/data/text.py | 115 ------- .../data/annotation_types/data/tiled_image.py | 294 ------------------ .../data/annotation_types/data/video.py | 173 ----------- .../labelbox/data/annotation_types/label.py | 42 +-- .../serialization/ndjson/classification.py | 12 +- .../data/serialization/ndjson/label.py | 40 +-- .../data/serialization/ndjson/metric.py | 8 +- .../labelbox/data/serialization/ndjson/mmc.py | 3 +- .../data/serialization/ndjson/objects.py | 29 +- .../data/serialization/ndjson/relationship.py | 4 +- libs/labelbox/src/labelbox/utils.py | 4 +- .../data/annotation_types/test_collection.py | 16 +- .../serialization/ndjson/test_checklist.py | 14 +- .../data/serialization/ndjson/test_image.py | 3 +- .../data/serialization/ndjson/test_radio.py | 8 +- .../data/serialization/ndjson/test_text.py | 5 +- 27 files changed, 57 insertions(+), 802 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/audio.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/document.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/html.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/text.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/video.py diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index 7908bc242..84d6d65a5 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -32,18 +32,8 @@ from .classification import Radio from .classification import Text -from .data import AudioData -from .data import ConversationData -from .data import DicomData -from .data import DocumentData -from .data import HTMLData -from .data import ImageData +from .data import GenericDataRowData from .data import MaskData -from .data import TextData -from .data import VideoData -from .data import LlmPromptResponseCreationData -from .data import LlmPromptCreationData -from .data import LlmResponseCreationData from .label import Label from .collection import LabelGenerator @@ -58,8 +48,6 @@ from .data.tiled_image import EPSG from .data.tiled_image import EPSGTransformer from .data.tiled_image import TiledBounds -from .data.tiled_image import TiledImageData -from .data.tiled_image import TileLayer from .llm_prompt_response.prompt import PromptText from .llm_prompt_response.prompt import PromptClassificationAnnotation diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py index 2522b2741..8d5e7289b 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py @@ -1,12 +1,2 @@ -from .audio import AudioData -from .conversation import ConversationData -from .dicom import DicomData -from .document import DocumentData -from .html import HTMLData -from .raster import ImageData from .raster import MaskData -from .text import TextData -from .video import VideoData -from .llm_prompt_response_creation import LlmPromptResponseCreationData -from .llm_prompt_creation import LlmPromptCreationData -from .llm_response_creation import LlmResponseCreationData +from .generic_data_row_data import GenericDataRowData diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py deleted file mode 100644 index 916fca99d..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class AudioData(BaseData, _NoCoercionMixin): - class_name: Literal["AudioData"] = "AudioData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py deleted file mode 100644 index ef6507dca..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py b/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py deleted file mode 100644 index ae4c377dc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DicomData(BaseData, _NoCoercionMixin): - class_name: Literal["DicomData"] = "DicomData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py b/libs/labelbox/src/labelbox/data/annotation_types/data/document.py deleted file mode 100644 index 810a3ed3e..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DocumentData(BaseData, _NoCoercionMixin): - class_name: Literal["DocumentData"] = "DocumentData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py b/libs/labelbox/src/labelbox/data/annotation_types/data/html.py deleted file mode 100644 index 7a78fcb7b..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class HTMLData(BaseData, _NoCoercionMixin): - class_name: Literal["HTMLData"] = "HTMLData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py deleted file mode 100644 index a1b0450bc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptCreationData"] = "LlmPromptCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py deleted file mode 100644 index a8dfce894..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py +++ /dev/null @@ -1,9 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptResponseCreationData"] = ( - "LlmPromptResponseCreationData" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py deleted file mode 100644 index a8963ed3f..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmResponseCreationData"] = "LlmResponseCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index ba4c6485f..0dd23e388 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -11,7 +11,7 @@ from pydantic import BaseModel, model_validator, ConfigDict from labelbox.exceptions import InternalServerError -from .base_data import BaseData + from ..types import TypedArray @@ -220,6 +220,3 @@ class MaskData(RasterData): url: Optional[str] = None arr: Optional[TypedArray[Literal['uint8']]] = None """ - - -class ImageData(RasterData, BaseData): ... diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py b/libs/labelbox/src/labelbox/data/annotation_types/data/text.py deleted file mode 100644 index fe4c222d3..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py +++ /dev/null @@ -1,115 +0,0 @@ -from typing import Callable, Optional - -import requests -from requests.exceptions import ConnectTimeout -from google.api_core import retry - -from pydantic import ConfigDict, model_validator -from labelbox.exceptions import InternalServerError -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class TextData(BaseData, _NoCoercionMixin): - """ - Represents text data. Requires arg file_path, text, or url - - >>> TextData(text="") - - Args: - file_path (str) - text (str) - url (str) - """ - - class_name: Literal["TextData"] = "TextData" - file_path: Optional[str] = None - text: Optional[str] = None - url: Optional[str] = None - model_config = ConfigDict(extra="forbid") - - @property - def value(self) -> str: - """ - Property that unifies the data access pattern for all references to the text. - - Returns: - string representation of the text - """ - if self.text: - return self.text - elif self.file_path: - with open(self.file_path, "r") as file: - text = file.read() - self.text = text - return text - elif self.url: - text = self.fetch_remote() - self.text = text - return text - else: - raise ValueError("Must set either url, file_path or im_bytes") - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry( - deadline=15.0, - predicate=retry.if_exception_type(ConnectTimeout, InternalServerError), - ) - def fetch_remote(self) -> str: - """ - Method for accessing url. - - If url is not publicly accessible or requires another access pattern - simply override this function - """ - response = requests.get(self.url) - if response.status_code in [500, 502, 503, 504]: - raise InternalServerError(response.text) - response.raise_for_status() - return response.text - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other text references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the text - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.text is not None: - self.url = signer(self.text.encode()) - else: - raise ValueError( - "One of url, im_bytes, file_path, numpy must not be None." - ) - return self.url - - @model_validator(mode="after") - def validate_date(self, values): - file_path = self.file_path - text = self.text - url = self.url - uid = self.uid - global_key = self.global_key - if uid == file_path == text == url == global_key == None: - raise ValueError( - "One of `file_path`, `text`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"TextData(file_path={self.file_path}," - f"text={self.text[:30] + '...' if self.text is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index adb8db549..cdb7f4127 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,300 +88,6 @@ def validate_bounds_lat_lng(self): return self -class TileLayer(BaseModel): - """Url that contains the tile layer. Must be in the format: - - https://c.tile.openstreetmap.org/{z}/{x}/{y}.png - - >>> layer = TileLayer( - url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", - name="slippy map tile" - ) - """ - - url: str - name: Optional[str] = "default" - - def asdict(self) -> Dict[str, str]: - return {"tileLayerUrl": self.url, "name": self.name} - - @field_validator("url") - def validate_url(cls, url): - xyz_format = "/{z}/{x}/{y}" - if xyz_format not in url: - raise ValueError(f"{url} needs to contain {xyz_format}") - return url - - -class TiledImageData(BaseData): - """Represents tiled imagery - - If specified version is 2, converts bounds from [lng,lat] to [lat,lng] - - Requires the following args: - tile_layer: TileLayer - tile_bounds: TiledBounds - zoom_levels: List[int] - Optional args: - max_native_zoom: int = None - tile_size: Optional[int] - version: int = 2 - alternative_layers: List[TileLayer] - - >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, - tile_bounds=TiledBounds, - zoom_levels=[1, 12]) - """ - - tile_layer: TileLayer - tile_bounds: TiledBounds - alternative_layers: List[TileLayer] = [] - zoom_levels: Tuple[int, int] - max_native_zoom: Optional[int] = None - tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE - version: Optional[int] = 2 - multithread: bool = True - - def __post_init__(self) -> None: - if self.max_native_zoom is None: - self.max_native_zoom = self.zoom_levels[0] - - def asdict(self) -> Dict[str, str]: - return { - "tileLayerUrl": self.tile_layer.url, - "bounds": [ - [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], - [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], - ], - "minZoom": self.zoom_levels[0], - "maxZoom": self.zoom_levels[1], - "maxNativeZoom": self.max_native_zoom, - "epsg": self.tile_bounds.epsg.name, - "tileSize": self.tile_size, - "alternativeLayers": [ - layer.asdict() for layer in self.alternative_layers - ], - "version": self.version, - } - - def raster_data( - self, zoom: int = 0, max_tiles: int = 32, multithread=True - ) -> RasterData: - """Converts the tiled image asset into a RasterData object containing an - np.ndarray. - - Uses the minimum zoom provided to render the image. - """ - if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: - xstart, ystart, xend, yend = self._get_simple_image_params(zoom) - elif self.tile_bounds.epsg == EPSG.EPSG4326: - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, self.tile_bounds - ) - elif self.tile_bounds.epsg == EPSG.EPSG3857: - # transform to 4326 - transformer = EPSGTransformer.create_geo_to_geo_transformer( - EPSG.EPSG3857, EPSG.EPSG4326 - ) - transforming_bounds = [ - transformer(self.tile_bounds.bounds[0]), - transformer(self.tile_bounds.bounds[1]), - ] - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, transforming_bounds - ) - else: - raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") - - self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) - - rounded_tiles, pixel_offsets = list( - zip( - *[ - self._tile_to_pixel(pt) - for pt in [xstart, ystart, xend, yend] - ] - ) - ) - - image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) - arr = self._crop_to_bounds(image, *pixel_offsets) - return RasterData(arr=arr) - - @property - def value(self) -> np.ndarray: - """Returns the value of a generated RasterData object.""" - return self.raster_data( - self.zoom_levels[0], multithread=self.multithread - ).value - - def _get_simple_image_params( - self, zoom - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - - Simple has different order of x / y than lat / lng because of how leaflet behaves - leaflet reports all points as pixel locations at a zoom of 0 - """ - xend, xstart, yend, ystart = ( - self.tile_bounds.bounds[1].x, - self.tile_bounds.bounds[0].x, - self.tile_bounds.bounds[1].y, - self.tile_bounds.bounds[0].y, - ) - return ( - *[ - x * (2 ** (zoom)) / self.tile_size - for x in [xstart, ystart, xend, yend] - ], - ) - - def _get_3857_image_params( - self, zoom: int, bounds: TiledBounds - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - """ - lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y - lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x - - # Convert to zoom 0 tile coordinates - xstart, ystart = self._latlng_to_tile(lat_start, lng_start) - xend, yend = self._latlng_to_tile(lat_end, lng_end) - - # Make sure that the tiles are increasing in order - xstart, xend = min(xstart, xend), max(xstart, xend) - ystart, yend = min(ystart, yend), max(ystart, yend) - return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) - - def _latlng_to_tile( - self, lat: float, lng: float, zoom=0 - ) -> Tuple[float, float]: - """Converts lat/lng to 3857 tile coordinates - Formula found here: - https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 - """ - scale = 2**zoom - lat_rad = math.radians(lat) - x = (lng + 180.0) / 360.0 * scale - y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale - return x, y - - def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: - """Rounds a tile coordinate and reports the remainder in pixels""" - rounded_tile = int(tile) - remainder = tile - rounded_tile - pixel_offset = int(self.tile_size * remainder) - return rounded_tile, pixel_offset - - def _fetch_image_for_bounds( - self, - x_tile_start: int, - y_tile_start: int, - x_tile_end: int, - y_tile_end: int, - zoom: int, - multithread=True, - ) -> np.ndarray: - """Fetches the tiles and combines them into a single image. - - If a tile cannot be fetched, a padding of expected tile size is instead added. - """ - - if multithread: - tiles = {} - with ThreadPoolExecutor( - max_workers=TILE_DOWNLOAD_CONCURRENCY - ) as exc: - for x in range(x_tile_start, x_tile_end + 1): - for y in range(y_tile_start, y_tile_end + 1): - tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) - - rows = [] - for y in range(y_tile_start, y_tile_end + 1): - row = [] - for x in range(x_tile_start, x_tile_end + 1): - try: - if multithread: - row.append(tiles[(x, y)].result()) - else: - row.append(self._fetch_tile(x, y, zoom)) - except: - row.append( - np.zeros( - shape=(self.tile_size, self.tile_size, 3), - dtype=np.uint8, - ) - ) - rows.append(np.hstack(row)) - - return np.vstack(rows) - - @retry.Retry(initial=1, maximum=16, multiplier=2) - def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: - """ - Fetches the image and returns an np array. - """ - data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) - data.raise_for_status() - decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] - if decoded.shape[:2] != (self.tile_size, self.tile_size): - logger.warning(f"Unexpected tile size {decoded.shape}.") - return decoded - - def _crop_to_bounds( - self, - image: np.ndarray, - x_px_start: int, - y_px_start: int, - x_px_end: int, - y_px_end: int, - ) -> np.ndarray: - """This function slices off the excess pixels that are outside of the bounds. - This occurs because only full tiles can be downloaded at a time. - """ - - def invert_point(pt): - # Must have at least 1 pixel for stability. - pt = max(pt, 1) - # All pixel points are relative to a single tile - # So subtracting the tile size inverts the axis - pt = pt - self.tile_size - return pt if pt != 0 else None - - x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) - return image[y_px_start:y_px_end, x_px_start:x_px_end, :] - - def _validate_num_tiles( - self, - xstart: float, - ystart: float, - xend: float, - yend: float, - max_tiles: int, - ): - """Calculates the number of expected tiles we would fetch. - - If this is greater than the number of max tiles, raise an error. - """ - total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) - if total_n_tiles > max_tiles: - raise ValueError( - f"Requested zoom results in {total_n_tiles} tiles." - f"Max allowed tiles are {max_tiles}" - f"Increase max tiles or reduce zoom level." - ) - - @field_validator("zoom_levels") - def validate_zoom_levels(cls, zoom_levels): - if zoom_levels[0] > zoom_levels[1]: - raise ValueError( - f"Order of zoom levels should be min, max. Received {zoom_levels}" - ) - return zoom_levels - - class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py b/libs/labelbox/src/labelbox/data/annotation_types/data/video.py deleted file mode 100644 index 581801036..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py +++ /dev/null @@ -1,173 +0,0 @@ -import logging -import os -import urllib.request -from typing import Callable, Dict, Generator, Optional, Tuple -from typing_extensions import Literal -from uuid import uuid4 - -import cv2 -import numpy as np -from google.api_core import retry - -from .base_data import BaseData -from ..types import TypedArray - -from pydantic import ConfigDict, model_validator - -logger = logging.getLogger(__name__) - - -class VideoData(BaseData): - """ - Represents video - """ - - file_path: Optional[str] = None - url: Optional[str] = None - frames: Optional[Dict[int, TypedArray[Literal["uint8"]]]] = None - # Required for discriminating between data types - model_config = ConfigDict(extra="forbid") - - def load_frames(self, overwrite: bool = False) -> None: - """ - Loads all frames into memory at once in order to access in non-sequential order. - This will use a lot of memory, especially for longer videos - - Args: - overwrite: Replace existing frames - """ - if self.frames and not overwrite: - return - - for count, frame in self.frame_generator(): - if self.frames is None: - self.frames = {} - self.frames[count] = frame - - @property - def value(self): - return self.frame_generator() - - def frame_generator( - self, cache_frames=False, download_dir="/tmp" - ) -> Generator[Tuple[int, np.ndarray], None, None]: - """ - A generator for accessing individual frames in a video. - - Args: - cache_frames (bool): Whether or not to cache frames while iterating through the video. - download_dir (str): Directory to save the video to. Defaults to `/tmp` dir - """ - if self.frames is not None: - for idx, frame in self.frames.items(): - yield idx, frame - return - elif self.url and not self.file_path: - file_path = os.path.join(download_dir, f"{uuid4()}.mp4") - logger.info("Downloading the video locally to %s", file_path) - self.fetch_remote(file_path) - self.file_path = file_path - - vidcap = cv2.VideoCapture(self.file_path) - - success, frame = vidcap.read() - count = 0 - if cache_frames: - self.frames = {} - while success: - frame = frame[:, :, ::-1] - yield count, frame - if cache_frames: - self.frames[count] = frame - success, frame = vidcap.read() - count += 1 - - def __getitem__(self, idx: int) -> np.ndarray: - if self.frames is None: - raise ValueError( - "Cannot select by index without iterating over the entire video or loading all frames." - ) - return self.frames[idx] - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry(deadline=15.0) - def fetch_remote(self, local_path) -> None: - """ - Method for downloading data from self.url - - If url is not publicly accessible or requires another access pattern - simply override this function - - Args: - local_path: Where to save the thing too. - """ - urllib.request.urlretrieve(self.url, local_path) - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other video references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the video - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.frames is not None: - self.file_path = self.frames_to_video(self.frames) - self.url = self.create_url(signer) - else: - raise ValueError("One of url, file_path, frames must not be None.") - return self.url - - def frames_to_video( - self, frames: Dict[int, np.ndarray], fps=20, save_dir="/tmp" - ) -> str: - """ - Compresses the data by converting a set of individual frames to a single video. - - """ - file_path = os.path.join(save_dir, f"{uuid4()}.mp4") - out = None - for key in frames.keys(): - frame = frames[key] - if out is None: - out = cv2.VideoWriter( - file_path, - cv2.VideoWriter_fourcc(*"MP4V"), - fps, - frame.shape[:2], - ) - out.write(frame) - if out is None: - return - out.release() - return file_path - - @model_validator(mode="after") - def validate_data(self): - file_path = self.file_path - url = self.url - frames = self.frames - uid = self.uid - global_key = self.global_key - - if uid == file_path == frames == url == global_key == None: - raise ValueError( - "One of `file_path`, `frames`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"VideoData(file_path={self.file_path}," - f"frames={'...' if self.frames is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 7eef43f31..9d5b92bdd 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -6,7 +6,6 @@ from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.tiled_image import TiledImageData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -14,19 +13,6 @@ from .relationship import RelationshipAnnotation from .llm_prompt_response.prompt import PromptClassificationAnnotation from .classification import ClassificationAnswer -from .data import ( - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - ImageData, - TextData, - VideoData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, -) from .geometry import Mask from .metrics import ScalarMetric, ConfusionMatrixMetric from .video import VideoClassificationAnnotation @@ -35,22 +21,6 @@ from ..ontology import get_feature_schema_lookup from pydantic import BaseModel, field_validator, model_serializer -DataType = Union[ - VideoData, - ImageData, - TextData, - TiledImageData, - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, - GenericDataRowData, -] - class Label(BaseModel): """Container for holding data and annotations @@ -67,14 +37,13 @@ class Label(BaseModel): Args: uid: Optional Label Id in Labelbox - data: Data of Label, Image, Video, Text or dict with a single key uid | global_key | external_id. - Note use of classes as data is deprecated. Use GenericDataRowData or dict with a single key instead. + data: GenericDataRowData or dict with a single key uid | global_key | external_id. annotations: List of Annotations in the label extra: additional context """ uid: Optional[Cuid] = None - data: DataType + data: GenericDataRowData annotations: List[ Union[ ClassificationAnnotation, @@ -94,13 +63,6 @@ class Label(BaseModel): def validate_data(cls, data): if isinstance(data, Dict): return GenericDataRowData(**data) - elif isinstance(data, GenericDataRowData): - return data - else: - warnings.warn( - f"Using {type(data).__name__} class for label.data is deprecated. " - "Use a dict or an instance of GenericDataRowData instead." - ) return data def object_annotations(self) -> List[ObjectAnnotation]: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index 2c3215265..86cf0d094 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Union, Optional -from labelbox.data.annotation_types import ImageData, TextData, VideoData +from labelbox.data.annotation_types import GenericDataRowData from labelbox.data.mixins import ( ConfidenceMixin, CustomMetric, @@ -232,7 +232,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, ) -> "NDText": @@ -264,7 +264,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, @@ -304,7 +304,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, ) -> "NDRadio": @@ -427,7 +427,7 @@ def from_common( annotation: Union[ ClassificationAnnotation, VideoClassificationAnnotation ], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: classify_obj = cls.lookup_classification(annotation) if classify_obj is None: @@ -475,7 +475,7 @@ def to_common( def from_common( cls, annotation: Union[PromptClassificationAnnotation], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: return NDPromptText.from_common( str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 7039ae834..ffaefb4d7 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -14,7 +14,6 @@ ) from ...annotation_types.video import VideoObjectAnnotation, VideoMaskAnnotation from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...annotation_types.data import DicomData, ImageData, TextData, VideoData from ...annotation_types.data.generic_data_row_data import GenericDataRowData from ...annotation_types.label import Label from ...annotation_types.ner import TextEntity, ConversationEntity @@ -214,46 +213,9 @@ def _generate_annotations( yield Label( annotations=annotations, - data=self._infer_media_type(group.data_row, annotations), + data=GenericDataRowData, ) - def _infer_media_type( - self, - data_row: DataRow, - annotations: List[ - Union[ - TextEntity, - ConversationEntity, - VideoClassificationAnnotation, - DICOMObjectAnnotation, - VideoObjectAnnotation, - ObjectAnnotation, - ClassificationAnnotation, - ScalarMetric, - ConfusionMatrixMetric, - ] - ], - ) -> Union[TextData, VideoData, ImageData]: - if len(annotations) == 0: - raise ValueError("Missing annotations while inferring media type") - - types = {type(annotation) for annotation in annotations} - data = GenericDataRowData - if (TextEntity in types) or (ConversationEntity in types): - data = TextData - elif ( - VideoClassificationAnnotation in types - or VideoObjectAnnotation in types - ): - data = VideoData - elif DICOMObjectAnnotation in types: - data = DicomData - - if data_row.id: - return data(uid=data_row.id) - else: - return data(global_key=data_row.global_key) - @staticmethod def _get_consecutive_frames( frames_indices: List[int], diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index b28e575cf..f8b522ab5 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -1,6 +1,6 @@ from typing import Optional, Union, Type -from labelbox.data.annotation_types.data import ImageData, TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.serialization.ndjson.base import DataRow, NDJsonBase from labelbox.data.annotation_types.metrics.scalar import ( ScalarMetric, @@ -51,7 +51,7 @@ def to_common(self) -> ConfusionMatrixMetric: @classmethod def from_common( - cls, metric: ConfusionMatrixMetric, data: Union[TextData, ImageData] + cls, metric: ConfusionMatrixMetric, data: GenericDataRowData ) -> "NDConfusionMatrixMetric": return cls( uuid=metric.extra.get("uuid"), @@ -83,7 +83,7 @@ def to_common(self) -> ScalarMetric: @classmethod def from_common( - cls, metric: ScalarMetric, data: Union[TextData, ImageData] + cls, metric: ScalarMetric, data: GenericDataRowData ) -> "NDScalarMetric": return cls( uuid=metric.extra.get("uuid"), @@ -107,7 +107,7 @@ def to_common( def from_common( cls, annotation: Union[ScalarMetric, ConfusionMatrixMetric], - data: Union[TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDScalarMetric, NDConfusionMatrixMetric]: obj = cls.lookup_object(annotation) return obj.from_common(annotation, data) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 74d185f45..b2dcfb5b4 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -9,6 +9,7 @@ MessageRankingTask, MessageEvaluationTaskAnnotation, ) +from ...annotation_types import GenericDataRowData class MessageTaskData(_CamelCaseMixin): @@ -35,7 +36,7 @@ def to_common(self) -> MessageEvaluationTaskAnnotation: def from_common( cls, annotation: MessageEvaluationTaskAnnotation, - data: Any, # Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDMessageTask": return cls( uuid=str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index 91abface6..1bcba7a89 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Tuple, Union, Optional import base64 +from labelbox.data.annotation_types.data.raster import MaskData from labelbox.data.annotation_types.ner.conversation_entity import ( ConversationEntity, ) @@ -21,9 +22,9 @@ from PIL import Image from labelbox.data.annotation_types import feature -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData -from ...annotation_types.data import ImageData, TextData, MaskData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.ner import ( DocumentEntity, DocumentTextSelection, @@ -96,7 +97,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPoint": @@ -161,7 +162,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDLine": @@ -245,7 +246,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPolygon": @@ -282,7 +283,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -329,7 +330,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -508,7 +509,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[VideoObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -545,7 +546,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[DICOMObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -601,7 +602,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDMask": @@ -706,7 +707,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDTextEntity": @@ -743,7 +744,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDDocumentEntity": @@ -778,7 +779,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDConversationEntity": @@ -836,7 +837,7 @@ def from_common( List[List[VideoObjectAnnotation]], VideoMaskAnnotation, ], - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> Union[ NDLine, NDPoint, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index 94c8e9879..d558ac244 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -1,7 +1,7 @@ from typing import Union from pydantic import BaseModel from .base import NDAnnotation, DataRow -from ...annotation_types.data import ImageData, TextData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType @@ -40,7 +40,7 @@ def to_common( def from_common( cls, annotation: RelationshipAnnotation, - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDRelationship": relationship = annotation.value return cls( diff --git a/libs/labelbox/src/labelbox/utils.py b/libs/labelbox/src/labelbox/utils.py index c76ce188f..dcf51be82 100644 --- a/libs/labelbox/src/labelbox/utils.py +++ b/libs/labelbox/src/labelbox/utils.py @@ -87,8 +87,8 @@ class _NoCoercionMixin: when serializing the object. Example: - class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" + class GenericDataRowData(BaseData, _NoCoercionMixin): + class_name: Literal["GenericDataRowData"] = "GenericDataRowData" """ diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 9deddc3c8..8b2627776 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -7,19 +7,21 @@ from labelbox.data.annotation_types import ( LabelGenerator, ObjectAnnotation, - ImageData, - MaskData, Line, Mask, Point, Label, + GenericDataRowData, + MaskData, ) from labelbox import OntologyBuilder, Tool @pytest.fixture def list_of_labels(): - return [Label(data=ImageData(url="http://someurl")) for _ in range(5)] + return [ + Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + ] @pytest.fixture @@ -73,7 +75,7 @@ def test_conversion(list_of_labels): def test_adding_schema_ids(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=GenericDataRowData(uid="123456"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -93,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -106,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -121,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[ ObjectAnnotation( name="1234", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..fb78916f4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -4,7 +4,7 @@ ClassificationAnswer, Radio, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -13,9 +13,8 @@ def test_serialization_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -41,9 +40,8 @@ def test_serialization_min(): def test_serialization_with_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -131,9 +129,8 @@ def test_serialization_with_classification(): def test_serialization_with_classification_double_nested(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -224,9 +221,8 @@ def test_serialization_with_classification_double_nested(): def test_serialization_with_classification_double_nested_2(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..4d615658c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -11,7 +11,6 @@ Mask, Label, ObjectAnnotation, - ImageData, MaskData, ) from labelbox.types import Rectangle, Polygon, Point @@ -262,7 +261,7 @@ def test_mask_from_arr(): ), ) ], - data=ImageData(uid="0" * 25), + data=GenericDataRowData(uid="0" * 25), ) res = next(NDJsonConverter.serialize([label])) res.pop("uuid") diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..ec57f0528 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -3,7 +3,7 @@ ClassificationAnswer, ) from labelbox.data.annotation_types.classification.classification import Radio -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -12,9 +12,8 @@ def test_serialization_with_radio_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -43,9 +42,8 @@ def test_serialization_with_radio_min(): def test_serialization_with_radio_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..28eba07bd 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -2,7 +2,7 @@ from labelbox.data.annotation_types.classification.classification import ( Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -11,9 +11,8 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( From 30819db04d66b7809a680659c4c88823a4bdb9b7 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:06:51 -0500 Subject: [PATCH 07/35] [PLT-1463] Removed ND deserialize from some unit test part 1 (#1804) --- .../classification_import_global_key.json | 54 -- ...conversation_entity_import_global_key.json | 25 - .../data/assets/ndjson/image_import.json | 779 +---------------- .../ndjson/image_import_global_key.json | 823 ------------------ .../assets/ndjson/image_import_name_only.json | 810 +---------------- .../ndjson/metric_import_global_key.json | 10 - .../assets/ndjson/pdf_import_global_key.json | 155 ---- .../ndjson/polyline_import_global_key.json | 36 - .../ndjson/text_entity_import_global_key.json | 26 - .../ndjson/video_import_global_key.json | 166 ---- .../serialization/ndjson/test_checklist.py | 26 - .../ndjson/test_classification.py | 108 ++- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 - .../serialization/ndjson/test_document.py | 294 ++++++- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 - .../serialization/ndjson/test_global_key.py | 125 +-- .../data/serialization/ndjson/test_image.py | 203 ++++- 20 files changed, 769 insertions(+), 3106 deletions(-) delete mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json deleted file mode 100644 index 4de15e217..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "confidence": 0.8, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [ - { - "schemaId": "ckrb1sfl8099e0y919v260awv", - "confidence": 0.82, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } - ], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json deleted file mode 100644 index 83a95e5bf..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json +++ /dev/null @@ -1,25 +0,0 @@ -[{ - "location": { - "start": 67, - "end": 128 - }, - "messageId": "some-message-id", - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] -}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 91563b8ae..75fe36e44 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,16 +8,17 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - } + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + }, + "classifications": [] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -28,20 +29,17 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + }, + "classifications": [] }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -50,762 +48,39 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 + "x": 10.0, + "y": 20.0 }, { - "x": 1119, - "y": 934 + "x": 15.0, + "y": 20.0 }, { - "x": 1118, - "y": 935 + "x": 20.0, + "y": 25.0 }, { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json deleted file mode 100644 index 591e40cf6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json +++ /dev/null @@ -1,823 +0,0 @@ -[ - { - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "schemaId": "ckrazcueb16og0z6609jj7y3y", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.851, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - { - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "schemaId": "ckrazcuec16ok0z66f956apb7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.834, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } - }, - { - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "schemaId": "ckrazcuec16oi0z66dzrd8pfl", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.986, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "polygon": [ - { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 - } - ] - }, - { - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "schemaId": "ckrazcuec16om0z66bhhh4tp7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "point": { - "x": 2122, - "y": 1457 - } - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 82be4cdab..466a03594 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,826 +1,86 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "box a", + "name": "ckrazcueb16og0z6609jj7y3y", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "confidence": 0.854, + "classifications": [], + "confidence": 0.851, "customMetrics": [ { "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.7 + "value": 0.4 } - ] + ], + "bbox": { + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "mask a", + "name": "ckrazcuec16ok0z66f956apb7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - }, - "confidence": 0.685, + "classifications": [], + "confidence": 0.834, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 - }, - { - "name": "customMetric2", - "value": 0.9 + "value": 0.3 } - ] + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + } }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "polygon a", + "name": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.71, + "confidence": 0.986, "customMetrics": [ { "name": "customMetric1", - "value": 0.1 + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 + "x": 10.0, + "y": 20.0 }, { - "x": 1099, - "y": 911 + "x": 15.0, + "y": 20.0 }, { - "x": 1100, - "y": 911 + "x": 20.0, + "y": 25.0 }, { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "point a", + "name": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.77, - "customMetrics": [ - { - "name": "customMetric2", - "value": 1.2 - } - ], "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json deleted file mode 100644 index 31be5a4c7..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "aggregation": "ARITHMETIC_MEAN", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "metricValue": 0.1 - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json deleted file mode 100644 index f4b4894f6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json +++ /dev/null @@ -1,155 +0,0 @@ -[{ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 4, - "unit": "POINTS", - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 162.73, - "left": 32.45, - "height": 388.16999999999996, - "width": 101.66000000000001 - } -}, { - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "bbox": { - "top": 223.26, - "left": 251.42, - "height": 457.03999999999996, - "width": 186.78 - } -}, { - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 6, - "unit": "POINTS", - "confidence": 0.99, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 32.52, - "left": 218.17, - "height": 231.73, - "width": 110.56000000000003 - } -}, { - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "confidence": 0.89, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 117.39, - "left": 4.25, - "height": 456.9200000000001, - "width": 164.83 - } -}, { - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 8, - "unit": "POINTS", - "bbox": { - "top": 82.13, - "left": 217.28, - "height": 279.76, - "width": 82.43000000000004 - } -}, { - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 3, - "unit": "POINTS", - "bbox": { - "top": 298.12, - "left": 83.34, - "height": 203.83000000000004, - "width": 0.37999999999999545 - } -}, -{ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "named_entity", - "classifications": [], - "textSelections": [ - { - "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - "tokenIds": [ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c" - ], - "page": 1 - } - ] -} -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json deleted file mode 100644 index d6a9eecbd..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - { - "line": [ - { - "x": 2534.353, - "y": 249.471 - }, - { - "x": 2429.492, - "y": 182.092 - }, - { - "x": 2294.322, - "y": 221.962 - } - ], - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-line", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.58, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json deleted file mode 100644 index 1f26d8dc8..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "location": { - "start": 67, - "end": 128 - }, - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json deleted file mode 100644 index 11e0753d9..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json +++ /dev/null @@ -1,166 +0,0 @@ -[{ - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" - }, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{ - "start": 30, - "end": 35 - }, { - "start": 50, - "end": 51 - }] -}, { - "answer": [{ - "schemaId": "ckrb1sfl8099e0y919v260awv" - }], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{ - "start": 0, - "end": 5 - }] -}, { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" -}, { - "classifications": [], - "schemaId": - "cl5islwg200gfci6g0oitaypu", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [{ - "keyframes": [{ - "frame": 1, - "line": [{ - "x": 10.0, - "y": 10.0 - }, { - "x": 100.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }, { - "frame": 5, - "line": [{ - "x": 15.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 8, - "line": [{ - "x": 100.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [{ - "keyframes": [{ - "frame": 1, - "point": { - "x": 10.0, - "y": 10.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 5, - "point": { - "x": 50.0, - "y": 50.0 - }, - "classifications": [] - }, { - "frame": 10, - "point": { - "x": 10.0, - "y": 50.0 - }, - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5iw0roz00lwci6g5jni62vs", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [{ - "keyframes": [{ - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0 - }, - "classifications": [] - }, { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0 - }, - "classifications": [] - }] - }] -}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 0bc3c8924..59f568c75 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,13 +37,6 @@ def test_serialization_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_classification(): label = Label( @@ -134,12 +127,6 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested(): label = Label( @@ -233,13 +220,6 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested_2(): label = Label( @@ -330,9 +310,3 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 8dcb17f0b..82adce99c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,15 +1,73 @@ import json +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + Radio, + Text, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ClassificationAnnotation, + ClassificationAnswer, +) +from labelbox.data.mixins import CustomMetric + def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.8, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.82, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data @@ -18,6 +76,48 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + name="classification a", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="choice 1", + ), + ), + ), + ClassificationAnnotation( + name="classification b", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.945, + name="choice 2", + ) + ], + ), + ), + ClassificationAnnotation( + name="classification c", + extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index f7da9181b..561f9ce86 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,8 +1,12 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -99,25 +103,62 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - deserialized_label = list(NDJsonConverter().deserialize(ndjson)) - deserialized_label[0].annotations[0].extra.pop("uuid") - assert deserialized_label[0].model_dump(exclude_none=True) == label[ - 0 - ].model_dump(exclude_none=True) +def test_conversation_entity_import(): + with open( + "tests/data/assets/ndjson/conversation_entity_import.json", "r" + ) as file: + data = json.load(file) -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/conversation_entity_import.json", + label = lb_types.Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) + assert res == data + + +def test_conversation_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - ], -) -def test_conversation_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = lb_types.Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, extra={}, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 333c00250..999e1bda5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,67 +1,29 @@ -from copy import copy -import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import ( - NDDicomSegments, - NDDicomSegment, - NDDicomLine, -) - -""" -Data gen prompt test data -""" - -prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), -) - -prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, -} - -data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], -) - -""" -Prompt annotation test -""" def test_serialize_label(): - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) - # Remove uuid field since this is a random value that can not be specified also meant for relationships - del serialized_label["uuid"] - assert serialized_label == prompt_text_ndjson - - -def test_deserialize_label(): - deserialized_label = next( - NDJsonConverter().deserialize([prompt_text_ndjson]) + prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + extra={"uuid": "test"}, + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), ) - if hasattr(deserialized_label.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized_label.annotations[0].extra = {} - assert deserialized_label.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "test", + } + + data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], + ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) -def test_serialize_deserialize_label(): - serialized = list(NDJsonConverter.serialize([data_gen_label])) - deserialized = next(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized.annotations[0].extra = {} - assert deserialized.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + assert serialized_label == prompt_text_ndjson diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 633214367..762891aa2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,6 +1,5 @@ from copy import copy import pytest -import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -181,28 +180,3 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson - - -@pytest.mark.parametrize("label, ndjson", labels_ndjsons) -def test_deserialize_label(label, ndjson): - deserialized_label = next(NDJsonConverter().deserialize([ndjson])) - if hasattr(deserialized_label.annotations[0], "extra"): - deserialized_label.annotations[0].extra = {} - for i, annotation in enumerate(deserialized_label.annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value - - -@pytest.mark.parametrize("label", labels) -def test_serialize_deserialize_label(label): - serialized = list(NDJsonConverter.serialize([label])) - deserialized = list(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized[0].annotations[0], "extra"): - deserialized[0].annotations[0].extra = {} - for i, annotation in enumerate(deserialized[0].annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index 5fe6a9789..a0897ad9f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,6 +1,19 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + RectangleUnit, + Point, + DocumentRectangle, + DocumentEntity, + DocumentTextSelection, +) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -53,10 +66,144 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.89, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_with_name_only(): @@ -65,26 +212,135 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id=None, + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.74, + name="boxy", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson - - -def test_pdf_bbox_deserialize(): - deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) - deserialized[0].annotations[0].extra = {} - assert ( - deserialized[0].annotations[0].value - == bbox_labels[0].annotations[0].value - ) - assert ( - deserialized[0].annotations[0].name - == bbox_labels[0].annotations[0].name - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 4adcd9935..1ab678cde 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,8 +9,6 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", - file_path=None, - frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -22,6 +20,7 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", + "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -588,31 +587,4 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - manual_label = video_serialized_bbox_label() - - for key in label.keys(): - # ignore uuid because we randomize if there was none - if key != "uuid": - assert label[key] == manual_label[key] - - assert len(label["segments"]) == 2 - assert len(label["segments"][0]["keyframes"]) == 2 - assert len(label["segments"][1]["keyframes"]) == 4 - - # #converts back only the keyframes. should be the sum of all prev segments - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - assert len(label.annotations) == 6 - - -def test_confidence_is_ignored(): - label = video_bbox_label() - serialized_labels = NDJsonConverter.serialize([label]) - label = next(serialized_labels) - label["confidence"] = 0.453 - label["segments"][0]["confidence"] = 0.453 - - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - for annotation in label.annotations: - assert annotation.confidence is None + assert label == video_serialized_bbox_label() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 84c017497..349be13a8 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,16 +34,6 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "text_answer" - assert annotation_value.confidence == 0.5 - def test_nested_serialization(): label = Label( @@ -102,19 +92,3 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - answer = annotation.value.answer[0] - assert answer.confidence == 0.9 - assert answer.name == "first_answer" - - classification_answer = answer.classifications[0].value.answer - assert classification_answer.confidence == 0.8 - assert classification_answer.name == "first_sub_radio_answer" - - sub_classification_answer = classification_answer.classifications[0].value - assert type(sub_classification_answer) is Text - assert sub_classification_answer.answer == "nested answer" - assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index 2b3fa7f8c..d104a691e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,73 +1,74 @@ -import json -import pytest - -from labelbox.data.serialization.ndjson.classification import NDRadio - +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import NDLine - - -def round_dict(data): - if isinstance(data, dict): - for key in data: - if isinstance(data[key], float): - data[key] = int(data[key]) - elif isinstance(data[key], dict): - data[key] = round_dict(data[key]) - elif isinstance(data[key], (list, tuple)): - data[key] = [round_dict(r) for r in data[key]] +from labelbox.types import ( + Label, + ClassificationAnnotation, + Radio, + ClassificationAnswer, +) - return data +def test_generic_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/classification_import_global_key.json", - "tests/data/assets/ndjson/metric_import_global_key.json", - "tests/data/assets/ndjson/polyline_import_global_key.json", - "tests/data/assets/ndjson/text_entity_import_global_key.json", - "tests/data/assets/ndjson/conversation_entity_import_global_key.json", - ], -) -def test_many_types(filename: str): - with open(filename, "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data - f.close() + label = Label( + data=GenericDataRowData( + global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) + res = list(NDJsonConverter.serialize([label])) -def test_image(): - with open( - "tests/data/assets/ndjson/image_import_global_key.json", "r" - ) as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() + assert res == expected -def test_pdf(): - with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() +def test_dict_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] + label = Label( + data={ + "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", + }, + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) -def test_video(): - with open( - "tests/data/assets/ndjson/video_import_global_key.json", "r" - ) as f: - data = json.load(f) + res = list(NDJsonConverter.serialize([label])) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] - f.close() + assert res == expected diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 1729e1f46..d67acb9c3 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,4 +1,8 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -10,6 +14,7 @@ ImageData, MaskData, ) +from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -29,12 +34,74 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_image_with_name_only(): @@ -43,11 +110,74 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + name="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + name="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + name="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + name="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_mask(): @@ -57,10 +187,11 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], + "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -68,16 +199,54 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [255, 0, 0], + "colorRGB": (255, 0, 0), }, + "classifications": [], }, ] - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) + mask_numpy = mask_numpy.astype(np.uint8) + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.8, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Mask( + mask=MaskData(arr=mask_numpy), + color=(1, 1, 1), + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + extra={}, + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=(255, 0, 0), + ), + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + + assert res == data def test_mask_from_arr(): From 761b1e9643cc3b0d02ee762f950051e7e0d3e6e5 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:08:20 -0500 Subject: [PATCH 08/35] [PLT-1463] Removed ND deserialize from some unit test part 2 (#1815) --- .../data/serialization/ndjson/test_metric.py | 170 +++- .../data/serialization/ndjson/test_mmc.py | 125 ++- .../ndjson/test_ndlabel_subclass_matching.py | 19 - .../data/serialization/ndjson/test_nested.py | 236 ++++- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 - .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 ++- .../data/serialization/ndjson/test_text.py | 10 - .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +++++++++++++++++- 11 files changed, 1593 insertions(+), 196 deletions(-) delete mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 45c5c67bf..40e098405 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,38 +1,166 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.metrics.confusion_matrix import ( + ConfusionMatrixMetric, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ScalarMetric, + ScalarMetricAggregation, + ConfusionMatrixAggregation, +) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert reserialized == data + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + assert res == data def test_custom_scalar_metric(): - with open( - "tests/data/assets/ndjson/custom_scalar_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: 0.1, 0.2: 0.5}, + "metricName": "custom_iou", + "aggregation": "SUM", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value=0.1, + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value={"0.1": 0.1, "0.2": 0.5}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert res == data def test_custom_confusion_matrix_metric(): - with open( - "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (1, 1, 2, 3), + "metricName": "50%_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (0, 1, 2, 5), + "metricName": "50%_iou", + "featureName": "sample_class", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + "metricName": "50%_iou", + "aggregation": "CONFUSION_MATRIX", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ConfusionMatrixMetric( + value=(1, 1, 2, 3), + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value=(0, 1, 2, 5), + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert data == res diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 69594ff73..202f793fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,32 +1,125 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest from labelbox.data.serialization import NDJsonConverter +from labelbox.types import ( + Label, + MessageEvaluationTaskAnnotation, + MessageSingleSelectionTask, + MessageMultiSelectionTask, + MessageInfo, + OrderedMessageInfo, + MessageRankingTask, +) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - deserialized = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(deserialized)) + labels = [ + Label( + data=GenericDataRowData( + uid="cnjencjencjfencvj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="single-selection", + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + value=MessageSingleSelectionTask( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + parent_message_id="clxfznjb800073b6v43ppx9ca", + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cfcerfvergerfefj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="multi-selection", + extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, + value=MessageMultiSelectionTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + selected_messages=[ + MessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + ) + ], + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=2, + ), + ], + ), + ) + ], + ), + ] - assert data == reserialized + res = list(NDJsonConverter.serialize(labels)) + assert res == data -def test_mesage_ranking_task_wrong_order_serialization(): - with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: - data = json.load(file) - - some_ranking_task = next( - task - for task in data - if task["messageEvaluationTask"]["format"] == "message-ranking" - ) - some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ - "order" - ] = 3 +def test_mesage_ranking_task_wrong_order_serialization(): with pytest.raises(ValueError): - list(NDJsonConverter.deserialize([some_ranking_task])) + ( + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={ + "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" + }, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + ], + ), + ) + ], + ), + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py deleted file mode 100644 index 790bd87b3..000000000 --- a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from labelbox.data.serialization.ndjson.label import NDLabel -from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle -import pytest - - -def test_bad_annotation_input(): - data = [{"test": 3}] - with pytest.raises(ValueError): - NDLabel(**{"annotations": data}) - - -def test_correct_annotation_input(): - with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: - data = json.load(f) - assert isinstance( - NDLabel(**{"annotations": [data[0]]}).annotations[0], - NDDocumentRectangle, - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index e0f0df0e6..3633c9cbe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,13 +1,135 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Rectangle, + Point, + ClassificationAnnotation, + Radio, + ClassificationAnswer, + Text, + Checklist, +) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.34, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "5d03213e-4408-456c-9eca-cf0723202961", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.894, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={}, + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -16,6 +138,112 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="box a", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.811, + name="first answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box b", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification b", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.815, + name="second answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification c", + value=Checklist( + answer=[ + ClassificationAnswer( + name="third answer", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="a string", + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index 97d48a14e..cd11d97fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,18 +1,76 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ObjectAnnotation, Point, Line, Label -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/polyline_without_confidence_import.json", - "tests/data/assets/ndjson/polyline_import.json", - ], -) -def test_polyline_import(filename: str): - with open(filename, "r") as file: +def test_polyline_import_with_confidence(): + with open( + "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" + ) as file: + data = json.load(file) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data + + +def test_polyline_import_without_confidence(): + with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.58, + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index bd80f9267..4458e335c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -40,14 +39,6 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_radio_classification(): label = Label( @@ -101,10 +92,3 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert res.annotations[0].model_dump( - exclude_none=True - ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 66630dbb5..0e42ab152 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,6 +1,10 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -8,8 +12,26 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + }, + value=Rectangle( + start=Point(x=38.0, y=28.0), + end=Point(x=81.0, y=69.0), + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -39,8 +61,6 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, }, ) @@ -48,8 +68,9 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + + assert res == data def test_rectangle_mixed_start_end_points(): @@ -76,17 +97,13 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, - }, + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + assert res == data def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index f33719035..235b66957 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,16 +1,135 @@ import json -from uuid import uuid4 -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Point, + Rectangle, + RelationshipAnnotation, + Relationship, +) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) + res = [ + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + extra={}, + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl-DIFFERENT", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + ] res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -44,29 +163,3 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] - - -def test_relationship_nonexistent_object(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - relationship_annotation = data[2] - source_uuid = relationship_annotation["relationship"]["source"] - target_uuid = str(uuid4()) - relationship_annotation["relationship"]["target"] = target_uuid - error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" - - with pytest.raises(ValueError, match=error_msg): - list(NDJsonConverter.deserialize(data)) - - -def test_relationship_duplicate_uuids(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - source, target = data[0], data[1] - target["uuid"] = source["uuid"] - error_msg = f"UUID '{source['uuid']}' is not unique" - - with pytest.raises(AssertionError, match=error_msg): - list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index d5e81c51a..21db389cb 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,7 +1,5 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( - ClassificationAnswer, - Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -34,11 +32,3 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index 3e856f001..fb93f15d4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,21 +1,68 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, TextEntity + + +def test_text_entity_import(): + with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: + data = json.load(file) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/text_entity_import.json", +def test_text_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - ], -) -def test_text_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index c7a6535c4..4fba5c2ca 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json -from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, + Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,8 +13,10 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import VideoObjectAnnotation -from labelbox import parser +from labelbox.data.annotation_types.video import ( + VideoClassificationAnnotation, + VideoObjectAnnotation, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -24,15 +26,275 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_name_only(): @@ -40,16 +302,274 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + name="question 3", + extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_classification_global_subclassifications(): @@ -67,7 +587,6 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( - name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -94,7 +613,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = nested_checklist_annotation_ndjson = { + expected_second_annotation = { "name": "nested_checklist_question", "answer": [ { @@ -116,12 +635,6 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - assert annotation.name == label.annotations[i].name - def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -287,14 +800,6 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_video_classification_point(): bbox_annotation = [ @@ -445,13 +950,6 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - def test_video_classification_frameline(): bbox_annotation = [ @@ -619,9 +1117,289 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value + +[ + { + "answer": "a value", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 5, "start": 0}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5islwg200gfci6g0oitaypu", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + { + "classifications": [], + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + } + ] + }, + ], + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + } + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + }, + { + "classifications": [], + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + }, + ] + }, + ], + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "segments": [ + { + "keyframes": [ + { + "bbox": { + "height": 100.0, + "left": 5.0, + "top": 10.0, + "width": 150.0, + }, + "classifications": [], + "frame": 1, + }, + { + "bbox": { + "height": 50.0, + "left": 5.0, + "top": 30.0, + "width": 150.0, + }, + "classifications": [], + "frame": 5, + }, + ] + }, + { + "keyframes": [ + { + "bbox": { + "height": 400.0, + "left": 200.0, + "top": 300.0, + "width": 150.0, + }, + "classifications": [], + "frame": 10, + } + ] + }, + ], + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + }, +] + +[ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{"start": 0, "end": 5}], + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "classifications": [], + "schemaId": "cl5islwg200gfci6g0oitaypu", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + { + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + } + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + "classifications": [], + } + ] + }, + { + "keyframes": [ + { + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + "classifications": [], + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + "classifications": [], + }, + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0, + }, + "classifications": [], + }, + { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0, + }, + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0, + }, + "classifications": [], + } + ] + }, + ], + }, +] From 379171a7a50c9b472962718414ccf1b6b69a4a33 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 17 Sep 2024 12:10:48 -0700 Subject: [PATCH 09/35] [PLT-1274] Vb/deprecate bulkimportrequest plt 1274 (#1821) --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/orm/model.py | 1 - .../labelbox/schema/bulk_import_request.py | 1004 ----------------- libs/labelbox/src/labelbox/schema/enums.py | 25 - libs/labelbox/src/labelbox/schema/project.py | 119 +- .../test_bulk_import_request.py | 258 ----- .../test_ndjson_validation.py | 36 - 7 files changed, 6 insertions(+), 1438 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 5b5ac1f67..f9b82b422 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,7 +6,6 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 84dcac774..1f3ee1d86 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,7 +386,6 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] - BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py deleted file mode 100644 index 8e11f3261..000000000 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ /dev/null @@ -1,1004 +0,0 @@ -import json -import time -from uuid import UUID, uuid4 -import functools - -import logging -from pathlib import Path -from google.api_core import retry -from labelbox import parser -import requests -from pydantic import ( - ValidationError, - BaseModel, - Field, - field_validator, - model_validator, - ConfigDict, - StringConstraints, -) -from typing_extensions import Literal, Annotated -from typing import ( - Any, - List, - Optional, - BinaryIO, - Dict, - Iterable, - Tuple, - Union, - Type, - Set, - TYPE_CHECKING, -) - -from labelbox import exceptions as lb_exceptions -from labelbox import utils -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Relationship -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.serialization import serialize_labels -from labelbox.orm.model import Field as lb_Field - -if TYPE_CHECKING: - from labelbox import Project - from labelbox.types import Label - -NDJSON_MIME_TYPE = "application/x-ndjson" -logger = logging.getLogger(__name__) - -# TODO: Deprecate this library in place of labelimport and malprediction import library. - - -def _determinants(parent_cls: Any) -> List[str]: - return [ - k - for k, v in parent_cls.model_fields.items() - if v.json_schema_extra and "determinant" in v.json_schema_extra - ] - - -def _make_file_name(project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - -# TODO(gszpak): move it to client.py -def _make_request_data( - project_id: str, name: str, content_length: int, file_name: str -) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % query.results_query_part(BulkImportRequest) - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length, - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})), - } - - -def _send_create_file_command( - client, - request_data: dict, - file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str], -) -> dict: - response = client.execute(data=request_data, files={file_name: file_data}) - - if not response.get("createBulkImportRequest", None): - raise lb_exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" - % response.get("errors", None) - or response.get("error", None) - ) - - return response - - -class BulkImportRequest(DbObject): - """Represents the import job when importing annotations. - - Attributes: - name (str) - state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) - input_file_url (str): URL to your web-hosted NDJSON file - error_file_url (str): NDJSON that contains error messages for failed annotations - status_file_url (str): NDJSON that contains status for each annotation - created_at (datetime): UTC timestamp for date BulkImportRequest was created - - project (Relationship): `ToOne` relationship to Project - created_by (Relationship): `ToOne` relationship to User - """ - - name = lb_Field.String("name") - state = lb_Field.Enum(BulkImportRequestState, "state") - input_file_url = lb_Field.String("input_file_url") - error_file_url = lb_Field.String("error_file_url") - status_file_url = lb_Field.String("status_file_url") - created_at = lb_Field.DateTime("created_at") - - project = Relationship.ToOne("Project") - created_by = Relationship.ToOne("User", False, "created_by") - - @property - def inputs(self) -> List[Dict[str, Any]]: - """ - Inputs for each individual annotation uploaded. - This should match the ndjson annotations that you have uploaded. - - Returns: - Uploaded ndjson. - - * This information will expire after 24 hours. - """ - return self._fetch_remote_ndjson(self.input_file_url) - - @property - def errors(self) -> List[Dict[str, Any]]: - """ - Errors for each individual annotation uploaded. This is a subset of statuses - - Returns: - List of dicts containing error messages. Empty list means there were no errors - See `BulkImportRequest.statuses` for more details. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.error_file_url) - - @property - def statuses(self) -> List[Dict[str, Any]]: - """ - Status for each individual annotation uploaded. - - Returns: - A status for each annotation if the upload is done running. - See below table for more details - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - Field - - Description - * - uuid - - Specifies the annotation for the status row. - * - dataRow - - JSON object containing the Labelbox data row ID for the annotation. - * - status - - Indicates SUCCESS or FAILURE. - * - errors - - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.status_file_url) - - @functools.lru_cache() - def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: - """ - Fetches the remote ndjson file and caches the results. - - Args: - url (str): Can be any url pointing to an ndjson file. - Returns: - ndjson as a list of dicts. - """ - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.text) - - def refresh(self) -> None: - """Synchronizes values of all fields with the database.""" - query_str, params = query.get_single(BulkImportRequest, self.uid) - res = self.client.execute(query_str, params) - res = res[utils.camel_case(BulkImportRequest.type_name())] - self._set_field_values(res) - - def wait_till_done(self, sleep_time_seconds: int = 5) -> None: - self.wait_until_done(sleep_time_seconds) - - def wait_until_done(self, sleep_time_seconds: int = 5) -> None: - """Blocks import job until certain conditions are met. - - Blocks until the BulkImportRequest.state changes either to - `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, - periodically refreshing object's state. - - Args: - sleep_time_seconds (str): a time to block between subsequent API calls - """ - while self.state == BulkImportRequestState.RUNNING: - logger.info(f"Sleeping for {sleep_time_seconds} seconds...") - time.sleep(sleep_time_seconds) - self.__exponential_backoff_refresh() - - @retry.Retry( - predicate=retry.if_exception_type( - lb_exceptions.ApiLimitError, - lb_exceptions.TimeoutError, - lb_exceptions.NetworkError, - ) - ) - def __exponential_backoff_refresh(self) -> None: - self.refresh() - - @classmethod - def from_name( - cls, client, project_id: str, name: str - ) -> "BulkImportRequest": - """Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name} - response = client.execute(query_str, params=params) - return cls(client, response["bulkImportRequest"]) - - @classmethod - def create_from_url( - cls, client, project_id: str, name: str, url: str, validate=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - validate (bool): a flag indicating if there should be a validation - if `url` is valid ndjson - Returns: - BulkImportRequest object - """ - if validate: - logger.warn( - "Validation is turned on. The file will be downloaded locally and processed before uploading." - ) - res = requests.get(url) - data = parser.loads(res.text) - _validate_ndjson(data, client.get_project(project_id)) - - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls( - client, bulk_import_request_response["createBulkImportRequest"] - ) - - @classmethod - def create_from_objects( - cls, - client, - project_id: str, - name: str, - predictions: Union[Iterable[Dict], Iterable["Label"]], - validate=True, - ) -> "BulkImportRequest": - """ - Creates a `BulkImportRequest` from an iterable of dictionaries. - - Conforms to JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - validate (bool): a flag indicating if there should be a validation - if `predictions` is valid ndjson - Returns: - BulkImportRequest object - """ - if not isinstance(predictions, list): - raise TypeError( - f"annotations must be in a form of Iterable. Found {type(predictions)}" - ) - ndjson_predictions = serialize_labels(predictions) - - if validate: - _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - - data_str = parser.dumps(ndjson_predictions) - if not data_str: - raise ValueError("annotations cannot be empty") - - data = data_str.encode("utf-8") - file_name = _make_file_name(project_id, name) - request_data = _make_request_data( - project_id, name, len(data_str), file_name - ) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, - request_data=request_data, - file_name=file_name, - file_data=file_data, - ) - - return cls(client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file( - cls, client, project_id: str, name: str, file: Path, validate_file=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = _make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = _make_request_data( - project_id, name, content_length, file_name - ) - - with file.open("rb") as f: - if validate_file: - reader = parser.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - _validate_ndjson(reader, client.get_project(project_id)) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, request_data, file_name, file_data - ) - return cls(client, response_data["createBulkImportRequest"]) - - def delete(self) -> None: - """Deletes the import job and also any annotations created by this import. - - Returns: - None - """ - id_param = "bulk_request_id" - query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { - deleteBulkImportRequest(where: {id: $%s}) { - id - name - } - }""" % (id_param, id_param) - self.client.execute(query_str, {id_param: self.uid}) - - -def _validate_ndjson( - lines: Iterable[Dict[str, Any]], project: "Project" -) -> None: - """ - Client side validation of an ndjson object. - - Does not guarentee that an upload will succeed for the following reasons: - * We are not checking the data row types which will cause the following errors to slip through - * Missing frame indices will not causes an error for videos - * Uploaded annotations for the wrong data type will pass (Eg. entity on images) - * We are not checking bounds of an asset (Eg. frame index, image height, text location) - - Args: - lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines - project (Project): id of project for which predictions will be imported - - Raises: - MALValidationError: Raise for invalid NDJson - UuidError: Duplicate UUID in upload - """ - feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( - project.ontology() - ) - uids: Set[str] = set() - for idx, line in enumerate(lines): - try: - annotation = NDAnnotation(**line) - annotation.validate_instance( - feature_schemas_by_id, feature_schemas_by_name - ) - uuid = str(annotation.uuid) - if uuid in uids: - raise lb_exceptions.UuidError( - f"{uuid} already used in this import job, " - "must be unique for the project." - ) - uids.add(uuid) - except (ValidationError, ValueError, TypeError, KeyError) as e: - raise lb_exceptions.MALValidationError( - f"Invalid NDJson on line {idx}" - ) from e - - -# The rest of this file contains objects for MAL validation -def parse_classification(tool): - """ - Parses a classification from an ontology. Only radio, checklist, and text are supported for mal - - Args: - tool (dict) - - Returns: - dict - """ - if tool["type"] in ["radio", "checklist"]: - option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] - option_names = [r["value"] for r in tool["options"]] - return { - "tool": tool["type"], - "featureSchemaId": tool["featureSchemaId"], - "name": tool["name"], - "options": [*option_schema_ids, *option_names], - } - elif tool["type"] == "text": - return { - "tool": tool["type"], - "name": tool["name"], - "featureSchemaId": tool["featureSchemaId"], - } - - -def get_mal_schemas(ontology): - """ - Converts a project ontology to a dict for easier lookup during ndjson validation - - Args: - ontology (Ontology) - Returns: - Dict, Dict : Useful for looking up a tool from a given feature schema id or name - """ - - valid_feature_schemas_by_schema_id = {} - valid_feature_schemas_by_name = {} - for tool in ontology.normalized["tools"]: - classifications = [ - parse_classification(classification_tool) - for classification_tool in tool["classifications"] - ] - classifications_by_schema_id = { - v["featureSchemaId"]: v for v in classifications - } - classifications_by_name = {v["name"]: v for v in classifications} - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - valid_feature_schemas_by_name[tool["name"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - for tool in ontology.normalized["classifications"]: - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( - parse_classification(tool) - ) - valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) - return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name - - -class Bbox(BaseModel): - top: float - left: float - height: float - width: float - - -class Point(BaseModel): - x: float - y: float - - -class FrameLocation(BaseModel): - end: int - start: int - - -class VideoSupported(BaseModel): - # Note that frames are only allowed as top level inferences for video - frames: Optional[List[FrameLocation]] = None - - -# Base class for a special kind of union. -class SpecialUnion: - def __new__(cls, **kwargs): - return cls.build(kwargs) - - @classmethod - def __get_validators__(cls): - yield cls.build - - @classmethod - def get_union_types(cls): - if not issubclass(cls, SpecialUnion): - raise TypeError("{} must be a subclass of SpecialUnion") - - union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] - if len(union_types) < 1: - raise TypeError( - "Class {cls} should inherit from a union of objects to build" - ) - if len(union_types) > 1: - raise TypeError( - f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" - ) - return union_types[0].__args__[0].__args__ - - @classmethod - def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": - """ - Checks through all objects in the union to see which matches the input data. - Args: - data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union - raises: - KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion - ValidationError: Error while trying to construct a specific object in the union - - """ - if isinstance(data, BaseModel): - data = data.model_dump() - - top_level_fields = [] - max_match = 0 - matched = None - - for type_ in cls.get_union_types(): - determinate_fields = _determinants(type_) - top_level_fields.append(determinate_fields) - matches = sum([val in determinate_fields for val in data]) - if matches == len(determinate_fields) and matches > max_match: - max_match = matches - matched = type_ - - if matched is not None: - # These two have the exact same top level keys - if matched in [NDRadio, NDText]: - if isinstance(data["answer"], dict): - matched = NDRadio - elif isinstance(data["answer"], str): - matched = NDText - else: - raise TypeError( - f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" - ) - return matched(**data) - else: - raise KeyError( - f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." - ) - - @classmethod - def schema(cls): - results = {"definitions": {}} - for cl in cls.get_union_types(): - schema = cl.schema() - results["definitions"].update(schema.pop("definitions")) - results[cl.__name__] = schema - return results - - -class DataRow(BaseModel): - id: str - - -class NDFeatureSchema(BaseModel): - schemaId: Optional[str] = None - name: Optional[str] = None - - @model_validator(mode="after") - def most_set_one(self): - if self.schemaId is None and self.name is None: - raise ValueError( - "Must set either schemaId or name for all feature schemas" - ) - return self - - -class NDBase(NDFeatureSchema): - ontology_type: str - uuid: UUID - dataRow: DataRow - model_config = ConfigDict(extra="forbid") - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - if self.name: - if self.name not in valid_feature_schemas_by_name: - raise ValueError( - f"Name {self.name} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_name[self.name]["tool"] - ): - raise ValueError( - f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" - ) - - if self.schemaId: - if self.schemaId not in valid_feature_schemas_by_id: - raise ValueError( - f"Schema id {self.schemaId} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_id[self.schemaId]["tool"] - ): - raise ValueError( - f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" - ) - - def validate_instance( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - self.validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - - -###### Classifications ###### - - -class NDText(NDBase): - ontology_type: Literal["text"] = "text" - answer: str = Field(json_schema_extra={"determinant": True}) - # No feature schema to check - - -class NDChecklist(VideoSupported, NDBase): - ontology_type: Literal["checklist"] = "checklist" - answers: List[NDFeatureSchema] = Field( - json_schema_extra={"determinant": True} - ) - - @field_validator("answers", mode="before") - def validate_answers(cls, value, field): - # constr not working with mypy. - if not len(value): - raise ValueError("Checklist answers should not be empty") - return value - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - # Test top level feature schema for this tool - super(NDChecklist, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - # Test the feature schemas provided to the answer field - if len( - set([answer.name or answer.schemaId for answer in self.answers]) - ) != len(self.answers): - raise ValueError( - f"Duplicated featureSchema found for checklist {self.uuid}" - ) - for answer in self.answers: - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if answer.name not in options and answer.schemaId not in options: - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" - ) - - -class NDRadio(VideoSupported, NDBase): - ontology_type: Literal["radio"] = "radio" - answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDRadio, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if ( - self.answer.name not in options - and self.answer.schemaId not in options - ): - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" - ) - - -# A union with custom construction logic to improve error messages -class NDClassification( - SpecialUnion, - Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore -): ... - - -###### Tools ###### - - -class NDBaseTool(NDBase): - classifications: List[NDClassification] = [] - - # This is indepdent of our problem - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDBaseTool, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - for classification in self.classifications: - classification.validate_feature_schemas( - valid_feature_schemas_by_name[self.name][ - "classificationsBySchemaId" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsBySchemaId" - ], - valid_feature_schemas_by_name[self.name][ - "classificationsByName" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsByName" - ], - ) - - @field_validator("classifications", mode="before") - def validate_subclasses(cls, value, field): - # Create uuid and datarow id so we don't have to define classification objects twice - # This is caused by the fact that we require these ids for top level classifications but not for subclasses - results = [] - dummy_id = "child".center(25, "_") - for row in value: - results.append( - {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} - ) - return results - - -class NDPolygon(NDBaseTool): - ontology_type: Literal["polygon"] = "polygon" - polygon: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("polygon") - def is_geom_valid(cls, v): - if len(v) < 3: - raise ValueError( - f"A polygon must have at least 3 points to be valid. Found {v}" - ) - return v - - -class NDPolyline(NDBaseTool): - ontology_type: Literal["line"] = "line" - line: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("line") - def is_geom_valid(cls, v): - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - return v - - -class NDRectangle(NDBaseTool): - ontology_type: Literal["rectangle"] = "rectangle" - bbox: Bbox = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class NDPoint(NDBaseTool): - ontology_type: Literal["point"] = "point" - point: Point = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class EntityLocation(BaseModel): - start: int - end: int - - -class NDTextEntity(NDBaseTool): - ontology_type: Literal["named-entity"] = "named-entity" - location: EntityLocation = Field(json_schema_extra={"determinant": True}) - - @field_validator("location") - def is_valid_location(cls, v): - if isinstance(v, BaseModel): - v = v.model_dump() - - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - if v["start"] < 0: - raise ValueError(f"Text location must be positive. Found {v}") - if v["start"] > v["end"]: - raise ValueError( - f"Text start location must be less or equal than end. Found {v}" - ) - return v - - -class RLEMaskFeatures(BaseModel): - counts: List[int] - size: List[int] - - @field_validator("counts") - def validate_counts(cls, counts): - if not all([count >= 0 for count in counts]): - raise ValueError( - "Found negative value for counts. They should all be zero or positive" - ) - return counts - - @field_validator("size") - def validate_size(cls, size): - if len(size) != 2: - raise ValueError( - f"Mask `size` should have two ints representing height and with. Found : {size}" - ) - if not all([count > 0 for count in size]): - raise ValueError( - f"Mask `size` should be a postitive int. Found : {size}" - ) - return size - - -class PNGMaskFeatures(BaseModel): - # base64 encoded png bytes - png: str - - -class URIMaskFeatures(BaseModel): - instanceURI: str - colorRGB: Union[List[int], Tuple[int, int, int]] - - @field_validator("colorRGB") - def validate_color(cls, colorRGB): - # Does the dtype matter? Can it be a float? - if not isinstance(colorRGB, (tuple, list)): - raise ValueError( - f"Received color that is not a list or tuple. Found : {colorRGB}" - ) - elif len(colorRGB) != 3: - raise ValueError( - f"Must provide RGB values for segmentation colors. Found : {colorRGB}" - ) - elif not all([0 <= color <= 255 for color in colorRGB]): - raise ValueError( - f"All rgb colors must be between 0 and 255. Found : {colorRGB}" - ) - return colorRGB - - -class NDMask(NDBaseTool): - ontology_type: Literal["superpixel"] = "superpixel" - mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( - json_schema_extra={"determinant": True} - ) - - -# A union with custom construction logic to improve error messages -class NDTool( - SpecialUnion, - Type[ # type: ignore - Union[ - NDMask, - NDTextEntity, - NDPoint, - NDRectangle, - NDPolyline, - NDPolygon, - ] - ], -): ... - - -class NDAnnotation( - SpecialUnion, - Type[Union[NDTool, NDClassification]], # type: ignore -): - @classmethod - def build(cls: Any, data) -> "NDBase": - if not isinstance(data, dict): - raise ValueError("value must be dict") - errors = [] - for cl in cls.get_union_types(): - try: - return cl(**data) - except KeyError as e: - errors.append(f"{cl.__name__}: {e}") - - raise ValueError( - "Unable to construct any annotation.\n{}".format("\n".join(errors)) - ) - - @classmethod - def schema(cls): - data = {"definitions": {}} - for type_ in cls.get_union_types(): - schema_ = type_.schema() - data["definitions"].update(schema_.pop("definitions")) - data[type_.__name__] = schema_ - return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index 6f8aebc58..dfc87c8a4 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,31 +1,6 @@ from enum import Enum -class BulkImportRequestState(Enum): - """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). - - If you are not usinig MEA continue using BulkImportRequest. - AnnotationImports are in beta and will change soon. - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - State - - Description - * - RUNNING - - Indicates that the import job is not done yet. - * - FAILED - - Indicates the import job failed. Check `BulkImportRequest.errors` for more information - * - FINISHED - - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information - """ - - RUNNING = "RUNNING" - FAILED = "FAILED" - FINISHED = "FINISHED" - - class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f8876f7c4..88153e48f 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,36 +5,29 @@ import warnings from collections import namedtuple from datetime import datetime, timezone -from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, - Iterable, List, Optional, Tuple, - TypeVar, Union, overload, ) -from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -import requests -from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -46,7 +39,6 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, - validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -63,7 +55,6 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -72,7 +63,7 @@ ) if TYPE_CHECKING: - from labelbox import BulkImportRequest + pass DataRowPriority = int @@ -579,7 +570,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -814,7 +805,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1088,7 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " + "Batch was not created successfully: " + json.dumps(task.errors) ) @@ -1436,7 +1427,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1488,33 +1479,6 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] - def bulk_import_requests(self) -> PaginatedCollection: - """Returns bulk import request objects which are used in model-assisted labeling. - These are returned with the oldest first, and most recent last. - """ - - id_param = "project_id" - query_str = """query ListAllImportRequestsPyApi($%s: ID!) { - bulkImportRequests ( - where: { projectId: $%s } - skip: %%d - first: %%d - ) { - %s - } - }""" % ( - id_param, - id_param, - query.results_query_part(Entity.BulkImportRequest), - ) - return PaginatedCollection( - self.client, - query_str, - {id_param: str(self.uid)}, - ["bulkImportRequests"], - Entity.BulkImportRequest, - ) - def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1629,7 +1593,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1639,77 +1603,6 @@ def _wait_for_task(self, task_id: str) -> Task: return task - def upload_annotations( - self, - name: str, - annotations: Union[str, Path, Iterable[Dict]], - validate: bool = False, - ) -> "BulkImportRequest": # type: ignore - """Uploads annotations to a new Editor project. - - Args: - name (str): name of the BulkImportRequest job - annotations (str or Path or Iterable): - url that is publicly accessible by Labelbox containing an - ndjson file - OR local path to an ndjson file - OR iterable of annotation rows - validate (bool): - Whether or not to validate the payload before uploading. - Returns: - BulkImportRequest - """ - - if isinstance(annotations, str) or isinstance(annotations, Path): - - def _is_url_valid(url: Union[str, Path]) -> bool: - """Verifies that the given string is a valid url. - - Args: - url: string to be checked - Returns: - True if the given url is valid otherwise False - - """ - if isinstance(url, Path): - return False - parsed = urlparse(url) - return bool(parsed.scheme) and bool(parsed.netloc) - - if _is_url_valid(annotations): - return Entity.BulkImportRequest.create_from_url( - client=self.client, - project_id=self.uid, - name=name, - url=str(annotations), - validate=validate, - ) - else: - path = Path(annotations) - if not path.exists(): - raise FileNotFoundError( - f"{annotations} is not a valid url nor existing local file" - ) - return Entity.BulkImportRequest.create_from_local_file( - client=self.client, - project_id=self.uid, - name=name, - file=path, - validate_file=validate, - ) - elif isinstance(annotations, Iterable): - return Entity.BulkImportRequest.create_from_objects( - client=self.client, - project_id=self.uid, - name=name, - predictions=annotations, # type: ignore - validate=validate, - ) - else: - raise ValueError( - f"Invalid annotations given of type: {type(annotations)}" - ) - def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index 9abae1422..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,258 +0,0 @@ -from unittest.mock import patch -import uuid -from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index a0df559fc..0ec742333 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,8 +1,6 @@ from labelbox.schema.media_type import MediaType -from labelbox.schema.project import Project import pytest -from labelbox import parser from pytest_cases import parametrize, fixture_ref from labelbox.exceptions import MALValidationError @@ -12,7 +10,6 @@ NDMask, NDPolygon, NDPolyline, - NDRadio, NDRectangle, NDText, NDTextEntity, @@ -191,39 +188,6 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) -def test_validate_ndjson(tmp_path, configured_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - configured_project.upload_annotations( - name="name", annotations=str(file_path), validate=True - ) - - -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - repeat_uuid[0]["uuid"] = "test_uuid" - repeat_uuid[1]["uuid"] = "test_uuid" - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() From 5e87f4e767e847475c7bd7546f25dee68e42609c Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:41:28 -0500 Subject: [PATCH 10/35] [PLT-1463] Remove deserialize completely (#1818) --- .github/workflows/lbox-develop.yml | 4 +- .github/workflows/python-package-develop.yml | 4 +- .../data/serialization/ndjson/base.py | 12 -- .../serialization/ndjson/classification.py | 13 +- .../data/serialization/ndjson/converter.py | 14 -- .../data/serialization/ndjson/label.py | 64 +----- .../data/serialization/ndjson/metric.py | 5 +- .../labelbox/data/serialization/ndjson/mmc.py | 4 +- .../data/serialization/ndjson/objects.py | 49 ++--- .../data/serialization/ndjson/relationship.py | 4 +- libs/labelbox/src/labelbox/schema/__init__.py | 1 - libs/labelbox/src/labelbox/schema/project.py | 3 +- .../data/annotation_import/test_data_types.py | 83 -------- .../test_generic_data_types.py | 72 ------- .../test_mea_prediction_import.py | 70 ++++++- .../test_ndjson_validation.py | 194 ------------------ .../ndjson/test_generic_data_row_data.py | 79 +++++++ 17 files changed, 177 insertions(+), 498 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_import/test_data_types.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py diff --git a/.github/workflows/lbox-develop.yml b/.github/workflows/lbox-develop.yml index ba1e4f34e..efb642f66 100644 --- a/.github/workflows/lbox-develop.yml +++ b/.github/workflows/lbox-develop.yml @@ -2,9 +2,9 @@ name: LBox Develop on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/.github/workflows/python-package-develop.yml b/.github/workflows/python-package-develop.yml index 05eff5dc4..769d04c74 100644 --- a/.github/workflows/python-package-develop.yml +++ b/.github/workflows/python-package-develop.yml @@ -2,9 +2,9 @@ name: Labelbox Python SDK Staging (Develop) on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py index 75ebdc100..d8d8cd36f 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py @@ -8,18 +8,6 @@ from ....annotated_types import Cuid -subclass_registry = {} - - -class _SubclassRegistryBase(BaseModel): - model_config = ConfigDict(extra="allow") - - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - if cls.__name__ != "NDAnnotation": - with threading.Lock(): - subclass_registry[cls.__name__] = cls - class DataRow(_CamelCaseMixin): id: Optional[str] = None diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index b127c4a90..2c3215265 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -30,7 +30,6 @@ model_serializer, ) from pydantic.alias_generators import to_camel -from .base import _SubclassRegistryBase class NDAnswer(ConfidenceMixin, CustomMetricsMixin): @@ -224,7 +223,7 @@ def from_common( # ====== End of subclasses -class NDText(NDAnnotation, NDTextSubclass, _SubclassRegistryBase): +class NDText(NDAnnotation, NDTextSubclass): @classmethod def from_common( cls, @@ -249,9 +248,7 @@ def from_common( ) -class NDChecklist( - NDAnnotation, NDChecklistSubclass, VideoSupported, _SubclassRegistryBase -): +class NDChecklist(NDAnnotation, NDChecklistSubclass, VideoSupported): @model_serializer(mode="wrap") def serialize_model(self, handler): res = handler(self) @@ -298,9 +295,7 @@ def from_common( ) -class NDRadio( - NDAnnotation, NDRadioSubclass, VideoSupported, _SubclassRegistryBase -): +class NDRadio(NDAnnotation, NDRadioSubclass, VideoSupported): @classmethod def from_common( cls, @@ -343,7 +338,7 @@ def serialize_model(self, handler): return res -class NDPromptText(NDAnnotation, NDPromptTextSubclass, _SubclassRegistryBase): +class NDPromptText(NDAnnotation, NDPromptTextSubclass): @classmethod def from_common( cls, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py index 01ab8454a..8176d7862 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py @@ -26,20 +26,6 @@ class NDJsonConverter: - @staticmethod - def deserialize(json_data: Iterable[Dict[str, Any]]) -> LabelGenerator: - """ - Converts ndjson data (prediction import format) into the common labelbox format. - - Args: - json_data: An iterable representing the ndjson data - Returns: - LabelGenerator containing the ndjson data. - """ - data = NDLabel(**{"annotations": copy.copy(json_data)}) - res = data.to_common() - return res - @staticmethod def serialize( labels: LabelCollection, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 18134a228..7039ae834 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -46,7 +46,6 @@ from .relationship import NDRelationship from .base import DataRow from pydantic import BaseModel, ValidationError -from .base import subclass_registry, _SubclassRegistryBase from pydantic_core import PydanticUndefined from contextlib import suppress @@ -67,68 +66,7 @@ class NDLabel(BaseModel): - annotations: List[_SubclassRegistryBase] - - def __init__(self, **kwargs): - # NOTE: Deserialization of subclasses in pydantic is difficult, see here https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 - # Below implements the subclass registry as mentioned in the article. The python dicts we pass in can be missing certain fields - # we essentially have to infer the type against all sub classes that have the _SubclasssRegistryBase inheritance. - # It works by checking if the keys of our annotations we are missing in matches any required subclass. - # More keys are prioritized over less keys (closer match). This is used when importing json to our base models not a lot of customer workflows - # depend on this method but this works for all our existing tests with the bonus of added validation. (no subclass found it throws an error) - - for index, annotation in enumerate(kwargs["annotations"]): - if isinstance(annotation, dict): - item_annotation_keys = annotation.keys() - key_subclass_combos = defaultdict(list) - for subclass in subclass_registry.values(): - # Get all required keys from subclass - annotation_keys = [] - for k, field in subclass.model_fields.items(): - if field.default == PydanticUndefined and k != "uuid": - if ( - hasattr(field, "alias") - and field.alias in item_annotation_keys - ): - annotation_keys.append(field.alias) - elif ( - hasattr(field, "validation_alias") - and field.validation_alias - in item_annotation_keys - ): - annotation_keys.append(field.validation_alias) - else: - annotation_keys.append(k) - - key_subclass_combos[subclass].extend(annotation_keys) - - # Sort by subclass that has the most keys i.e. the one with the most keys that matches is most likely our subclass - key_subclass_combos = dict( - sorted( - key_subclass_combos.items(), - key=lambda x: len(x[1]), - reverse=True, - ) - ) - - for subclass, key_subclass_combo in key_subclass_combos.items(): - # Choose the keys from our dict we supplied that matches the required keys of a subclass - check_required_keys = all( - key in list(item_annotation_keys) - for key in key_subclass_combo - ) - if check_required_keys: - # Keep trying subclasses until we find one that has valid values (does not throw an validation error) - with suppress(ValidationError): - annotation = subclass(**annotation) - break - if isinstance(annotation, dict): - raise ValueError( - f"Could not find subclass for fields: {item_annotation_keys}" - ) - - kwargs["annotations"][index] = annotation - super().__init__(**kwargs) + annotations: AnnotationType class _Relationship(BaseModel): """This object holds information about the relationship""" diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index 60d538b19..b28e575cf 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -15,7 +15,6 @@ ConfusionMatrixMetricConfidenceValue, ) from pydantic import ConfigDict, model_serializer -from .base import _SubclassRegistryBase class BaseNDMetric(NDJsonBase): @@ -33,7 +32,7 @@ def serialize_model(self, handler): return res -class NDConfusionMatrixMetric(BaseNDMetric, _SubclassRegistryBase): +class NDConfusionMatrixMetric(BaseNDMetric): metric_value: Union[ ConfusionMatrixMetricValue, ConfusionMatrixMetricConfidenceValue ] @@ -65,7 +64,7 @@ def from_common( ) -class NDScalarMetric(BaseNDMetric, _SubclassRegistryBase): +class NDScalarMetric(BaseNDMetric): metric_value: Union[ScalarMetricValue, ScalarMetricConfidenceValue] metric_name: Optional[str] = None aggregation: Optional[ScalarMetricAggregation] = ( diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 4be24f683..74d185f45 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -2,7 +2,7 @@ from labelbox.utils import _CamelCaseMixin -from .base import _SubclassRegistryBase, DataRow, NDAnnotation +from .base import DataRow, NDAnnotation from ...annotation_types.mmc import ( MessageSingleSelectionTask, MessageMultiSelectionTask, @@ -20,7 +20,7 @@ class MessageTaskData(_CamelCaseMixin): ] -class NDMessageTask(NDAnnotation, _SubclassRegistryBase): +class NDMessageTask(NDAnnotation): message_evaluation_task: MessageTaskData def to_common(self) -> MessageEvaluationTaskAnnotation: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index a1465fa06..91abface6 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -52,7 +52,7 @@ NDSubclassification, NDSubclassificationType, ) -from .base import DataRow, NDAnnotation, NDJsonBase, _SubclassRegistryBase +from .base import DataRow, NDAnnotation, NDJsonBase from pydantic import BaseModel @@ -81,9 +81,7 @@ class Bbox(BaseModel): width: float -class NDPoint( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPoint(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): point: _Point def to_common(self) -> Point: @@ -114,7 +112,7 @@ def from_common( ) -class NDFramePoint(VideoSupported, _SubclassRegistryBase): +class NDFramePoint(VideoSupported): point: _Point classifications: List[NDSubclassificationType] = [] @@ -148,9 +146,7 @@ def from_common( ) -class NDLine( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDLine(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): line: List[_Point] def to_common(self) -> Line: @@ -181,7 +177,7 @@ def from_common( ) -class NDFrameLine(VideoSupported, _SubclassRegistryBase): +class NDFrameLine(VideoSupported): line: List[_Point] classifications: List[NDSubclassificationType] = [] @@ -215,7 +211,7 @@ def from_common( ) -class NDDicomLine(NDFrameLine, _SubclassRegistryBase): +class NDDicomLine(NDFrameLine): def to_common( self, name: str, @@ -234,9 +230,7 @@ def to_common( ) -class NDPolygon( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPolygon(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): polygon: List[_Point] def to_common(self) -> Polygon: @@ -267,9 +261,7 @@ def from_common( ) -class NDRectangle( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDRectangle(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): bbox: Bbox def to_common(self) -> Rectangle: @@ -313,7 +305,7 @@ def from_common( ) -class NDDocumentRectangle(NDRectangle, _SubclassRegistryBase): +class NDDocumentRectangle(NDRectangle): page: int unit: str @@ -360,7 +352,7 @@ def from_common( ) -class NDFrameRectangle(VideoSupported, _SubclassRegistryBase): +class NDFrameRectangle(VideoSupported): bbox: Bbox classifications: List[NDSubclassificationType] = [] @@ -496,7 +488,7 @@ def to_common( ] -class NDSegments(NDBaseObject, _SubclassRegistryBase): +class NDSegments(NDBaseObject): segments: List[NDSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -532,7 +524,7 @@ def from_common( ) -class NDDicomSegments(NDBaseObject, DicomSupported, _SubclassRegistryBase): +class NDDicomSegments(NDBaseObject, DicomSupported): segments: List[NDDicomSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -580,9 +572,7 @@ class _PNGMask(BaseModel): png: str -class NDMask( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDMask(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): mask: Union[_URIMask, _PNGMask] def to_common(self) -> Mask: @@ -646,7 +636,6 @@ class NDVideoMasks( NDJsonBase, ConfidenceMixin, CustomMetricsNotSupportedMixin, - _SubclassRegistryBase, ): masks: NDVideoMasksFramesInstances @@ -678,7 +667,7 @@ def from_common(cls, annotation, data): ) -class NDDicomMasks(NDVideoMasks, DicomSupported, _SubclassRegistryBase): +class NDDicomMasks(NDVideoMasks, DicomSupported): def to_common(self) -> DICOMMaskAnnotation: return DICOMMaskAnnotation( frames=self.masks.frames, @@ -702,9 +691,7 @@ class Location(BaseModel): end: int -class NDTextEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDTextEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): location: Location def to_common(self) -> TextEntity: @@ -738,9 +725,7 @@ def from_common( ) -class NDDocumentEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDDocumentEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): name: str text_selections: List[DocumentTextSelection] @@ -774,7 +759,7 @@ def from_common( ) -class NDConversationEntity(NDTextEntity, _SubclassRegistryBase): +class NDConversationEntity(NDTextEntity): message_id: str def to_common(self) -> ConversationEntity: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index fbea7e477..94c8e9879 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -5,7 +5,7 @@ from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType -from .base import DataRow, _SubclassRegistryBase +from .base import DataRow SUPPORTED_ANNOTATIONS = NDObjectType @@ -16,7 +16,7 @@ class _Relationship(BaseModel): type: str -class NDRelationship(NDAnnotation, _SubclassRegistryBase): +class NDRelationship(NDAnnotation): relationship: _Relationship @staticmethod diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 03327e0d1..d6b74de68 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,5 +1,4 @@ import labelbox.schema.asset_attachment -import labelbox.schema.bulk_import_request import labelbox.schema.annotation_import import labelbox.schema.benchmark import labelbox.schema.data_row diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 88153e48f..f2de4db5e 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1079,8 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) diff --git a/libs/labelbox/tests/data/annotation_import/test_data_types.py b/libs/labelbox/tests/data/annotation_import/test_data_types.py deleted file mode 100644 index 1e45295ef..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_data_types.py +++ /dev/null @@ -1,83 +0,0 @@ -import pytest - -from labelbox.data.annotation_types.data import ( - AudioData, - ConversationData, - DocumentData, - HTMLData, - ImageData, - TextData, -) -from labelbox.data.serialization import NDJsonConverter -from labelbox.data.annotation_types.data.video import VideoData - -import labelbox.types as lb_types -from labelbox.schema.media_type import MediaType - -# Unit test for label based on data type. -# TODO: Dicom removed it is unstable when you deserialize and serialize on label import. If we intend to keep this library this needs add generic data types tests work with this data type. -# TODO: add MediaType.LLMPromptResponseCreation(data gen) once supported and llm human preference once media type is added - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index 9de67bd4e..3fc6cddf6 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -28,78 +28,6 @@ def validate_iso_format(date_string: str): assert parsed_t.second is not None -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - (MediaType.LLMPromptResponseCreation, GenericDataRowData), - (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - (OntologyKind.ModelEvaluation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - # (MediaType.LLMPromptResponseCreation, GenericDataRowData), - # (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - (OntologyKind.ModelEvaluation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations - - @pytest.mark.parametrize( "configured_project, media_type", [ diff --git a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py index fccca2a3f..5f47975ad 100644 --- a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py +++ b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py @@ -1,5 +1,19 @@ import uuid from labelbox import parser +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.geometry.line import Line +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.polygon import Polygon +from labelbox.data.annotation_types.geometry.rectangle import Rectangle +from labelbox.data.annotation_types.label import Label import pytest from labelbox import ModelRun @@ -193,14 +207,60 @@ def test_create_from_label_objects( annotation_import_test_helpers, ): name = str(uuid.uuid4()) - use_data_row_ids = [ + use_data_row_id = [ p["dataRow"]["id"] for p in object_predictions_for_annotation_import ] - model_run_with_data_rows.upsert_data_rows(use_data_row_ids) - predictions = list( - NDJsonConverter.deserialize(object_predictions_for_annotation_import) - ) + model_run_with_data_rows.upsert_data_rows(use_data_row_id) + + predictions = [] + for data_row_id in use_data_row_id: + predictions.append( + Label( + data=GenericDataRowData( + uid=data_row_id, + ), + annotations=[ + ObjectAnnotation( + name="polygon", + extra={ + "uuid": "6d10fa30-3ea0-4e6c-bbb1-63f5c29fe3e4", + }, + value=Polygon( + points=[ + Point(x=147.692, y=118.154), + Point(x=142.769, y=104.923), + Point(x=57.846, y=118.769), + Point(x=28.308, y=169.846), + Point(x=147.692, y=118.154), + ], + ), + ), + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "15b7138f-4bbc-42c5-ae79-45d87b0a3b2a", + }, + value=Rectangle( + start=Point(x=58.0, y=48.0), + end=Point(x=70.0, y=113.0), + ), + ), + ObjectAnnotation( + name="polyline", + extra={ + "uuid": "cf4c6df9-c39c-4fbc-9541-470f6622978a", + }, + value=Line( + points=[ + Point(x=147.692, y=118.154), + Point(x=150.692, y=160.154), + ], + ), + ), + ], + ), + ) annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=predictions diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py deleted file mode 100644 index 0ec742333..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ /dev/null @@ -1,194 +0,0 @@ -from labelbox.schema.media_type import MediaType -import pytest - -from pytest_cases import parametrize, fixture_ref - -from labelbox.exceptions import MALValidationError -from labelbox.schema.bulk_import_request import ( - NDChecklist, - NDClassification, - NDMask, - NDPolygon, - NDPolyline, - NDRectangle, - NDText, - NDTextEntity, - NDTool, - _validate_ndjson, -) - -""" -- These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed -""" - - -def test_classification_construction(checklist_inference, text_inference): - checklist = NDClassification.build(checklist_inference[0]) - assert isinstance(checklist, NDChecklist) - text = NDClassification.build(text_inference[0]) - assert isinstance(text, NDText) - - -@parametrize( - "inference, expected_type", - [ - (fixture_ref("polygon_inference"), NDPolygon), - (fixture_ref("rectangle_inference"), NDRectangle), - (fixture_ref("line_inference"), NDPolyline), - (fixture_ref("entity_inference"), NDTextEntity), - (fixture_ref("segmentation_inference"), NDMask), - (fixture_ref("segmentation_inference_rle"), NDMask), - (fixture_ref("segmentation_inference_png"), NDMask), - ], -) -def test_tool_construction(inference, expected_type): - assert isinstance(NDTool.build(inference[0]), expected_type) - - -def no_tool(text_inference, module_project): - pred = text_inference[0].copy() - # Missing key - del pred["answer"] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -@pytest.mark.parametrize("configured_project", [MediaType.Text], indirect=True) -def test_invalid_text(text_inference, configured_project): - # and if it is not a string - pred = text_inference[0].copy() - # Extra and wrong key - del pred["answer"] - pred["answers"] = [] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) - del pred["answers"] - - # Invalid type - pred["answer"] = [] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) - - # Invalid type - pred["answer"] = None - with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) - - -def test_invalid_checklist_item(checklist_inference, module_project): - # Only two points - pred = checklist_inference[0].copy() - pred["answers"] = [pred["answers"][0], pred["answers"][0]] - # Duplicate schema ids - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [{"name": "asdfg"}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [{"schemaId": "1232132132"}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [{}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - del pred["answers"] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -def test_invalid_polygon(polygon_inference, module_project): - # Only two points - pred = polygon_inference[0].copy() - pred["polygon"] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -@pytest.mark.parametrize("configured_project", [MediaType.Text], indirect=True) -def test_incorrect_entity(entity_inference, configured_project): - entity = entity_inference[0].copy() - # Location cannot be a list - entity["location"] = [0, 10] - with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) - - entity["location"] = {"start": -1, "end": 5} - with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) - - entity["location"] = {"start": 15, "end": 5} - with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) - - -@pytest.mark.skip( - "Test wont work/fails randomly since projects have to have a media type and could be missing features from prediction list" -) -def test_all_validate_json(module_project, predictions): - # Predictions contains one of each type of prediction. - # These should be properly formatted and pass. - _validate_ndjson(predictions[0], module_project) - - -def test_incorrect_line(line_inference, module_project): - line = line_inference[0].copy() - line["line"] = [line["line"][0]] # Just one point - with pytest.raises(MALValidationError): - _validate_ndjson([line], module_project) - - -def test_incorrect_rectangle(rectangle_inference, module_project): - del rectangle_inference[0]["bbox"]["top"] - with pytest.raises(MALValidationError): - _validate_ndjson([rectangle_inference], module_project) - - -def test_duplicate_tools(rectangle_inference, module_project): - pred = rectangle_inference[0].copy() - pred["polygon"] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -def test_invalid_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - pred["schemaId"] = "blahblah" - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -def test_name_only_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - _validate_ndjson([pred], module_project) - - -def test_schema_id_only_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - del pred["name"] - ontology = module_project.ontology().normalized["tools"] - for tool in ontology: - if tool["name"] == "bbox": - feature_schema_id = tool["featureSchemaId"] - pred["schemaId"] = feature_schema_id - _validate_ndjson([pred], module_project) - - -def test_missing_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - del pred["name"] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -@pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) -def test_video_upload(video_checklist_inference, configured_project): - pred = video_checklist_inference[0].copy() - _validate_ndjson([pred], configured_project) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py new file mode 100644 index 000000000..0dc4c21c0 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py @@ -0,0 +1,79 @@ +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ClassificationAnnotation, Text + + +def test_generic_data_row_global_key(): + label_1 = Label( + data=GenericDataRowData(global_key="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"global_key": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"globalKey": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) + + +def test_generic_data_row_id(): + label_1 = Label( + data=GenericDataRowData(uid="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"uid": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"id": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) From 5fc6ff3be56536b5e672a96071930916e047816c Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 21:51:13 -0500 Subject: [PATCH 11/35] [PLT-1488] Removed coco (#1820) --- .../labelbox/data/serialization/__init__.py | 1 - .../data/serialization/coco/__init__.py | 1 - .../data/serialization/coco/annotation.py | 78 ----- .../data/serialization/coco/categories.py | 17 -- .../data/serialization/coco/converter.py | 170 ----------- .../labelbox/data/serialization/coco/image.py | 52 ---- .../serialization/coco/instance_dataset.py | 266 ------------------ .../serialization/coco/panoptic_dataset.py | 242 ---------------- .../labelbox/data/serialization/coco/path.py | 9 - .../data/serialization/coco/test_coco.py | 38 --- 10 files changed, 874 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/__init__.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/annotation.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/categories.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/converter.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/image.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/path.py delete mode 100644 libs/labelbox/tests/data/serialization/coco/test_coco.py diff --git a/libs/labelbox/src/labelbox/data/serialization/__init__.py b/libs/labelbox/src/labelbox/data/serialization/__init__.py index 71a9b3443..38cb5edff 100644 --- a/libs/labelbox/src/labelbox/data/serialization/__init__.py +++ b/libs/labelbox/src/labelbox/data/serialization/__init__.py @@ -1,2 +1 @@ from .ndjson import NDJsonConverter -from .coco import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py b/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py deleted file mode 100644 index 4511e89ee..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .converter import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py b/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py deleted file mode 100644 index e387cb7d9..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py +++ /dev/null @@ -1,78 +0,0 @@ -from typing import Any, Tuple, List, Union -from pathlib import Path -from collections import defaultdict -import warnings - -from ...annotation_types.relationship import RelationshipAnnotation -from ...annotation_types.metrics.confusion_matrix import ConfusionMatrixMetric -from ...annotation_types.metrics.scalar import ScalarMetric -from ...annotation_types.video import VideoMaskAnnotation -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.classification.classification import ( - ClassificationAnnotation, -) - -import numpy as np - -from .path import PathSerializerMixin -from pydantic import BaseModel - - -def rle_decoding(rle_arr: List[int], w: int, h: int) -> np.ndarray: - indices = [] - for idx, cnt in zip(rle_arr[0::2], rle_arr[1::2]): - indices.extend( - list(range(idx - 1, idx + cnt - 1)) - ) # RLE is 1-based index - mask = np.zeros(h * w, dtype=np.uint8) - mask[indices] = 1 - return mask.reshape((w, h)).T - - -def get_annotation_lookup(annotations): - """Get annotations from Label.annotations objects - - Args: - annotations (Label.annotations): Annotations attached to labelbox Label object used as private method - """ - annotation_lookup = defaultdict(list) - for annotation in annotations: - # Provide a default value of None if the attribute doesn't exist - attribute_value = getattr(annotation, "image_id", None) or getattr( - annotation, "name", None - ) - annotation_lookup[attribute_value].append(annotation) - return annotation_lookup - - -class SegmentInfo(BaseModel): - id: int - category_id: int - area: Union[float, int] - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class RLE(BaseModel): - counts: List[int] - size: Tuple[int, int] # h,w or w,h? - - -class COCOObjectAnnotation(BaseModel): - # All segmentations for a particular class in an image... - # So each image will have one of these for each class present in the image.. - # Annotations only exist if there is data.. - id: int - image_id: int - category_id: int - segmentation: Union[RLE, List[List[float]]] # [[x1,y1,x2,y2,x3,y3...]] - area: float - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class PanopticAnnotation(PathSerializerMixin): - # One to one relationship between image and panoptic annotation - image_id: int - file_name: Path - segments_info: List[SegmentInfo] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py b/libs/labelbox/src/labelbox/data/serialization/coco/categories.py deleted file mode 100644 index 60ba30fce..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py +++ /dev/null @@ -1,17 +0,0 @@ -import sys -from hashlib import md5 - -from pydantic import BaseModel - - -class Categories(BaseModel): - id: int - name: str - supercategory: str - isthing: int = 1 - - -def hash_category_name(name: str) -> int: - return int.from_bytes( - md5(name.encode("utf-8")).hexdigest().encode("utf-8"), "little" - ) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py b/libs/labelbox/src/labelbox/data/serialization/coco/converter.py deleted file mode 100644 index e270b7573..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py +++ /dev/null @@ -1,170 +0,0 @@ -from typing import Dict, Any, Union -from pathlib import Path -import os -import warnings - -from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...serialization.coco.instance_dataset import CocoInstanceDataset -from ...serialization.coco.panoptic_dataset import CocoPanopticDataset - - -def create_path_if_not_exists( - path: Union[Path, str], ignore_existing_data=False -): - path = Path(path) - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - elif not ignore_existing_data and os.listdir(path): - raise ValueError( - f"Directory `{path}`` must be empty. Or set `ignore_existing_data=True`" - ) - return path - - -def validate_path(path: Union[Path, str], name: str): - path = Path(path) - if not path.exists(): - raise ValueError(f"{name} `{path}` must exist") - return path - - -class COCOConverter: - """ - Class for converting between coco and labelbox formats - Note that this class is only compatible with image data. - - Subclasses are currently ignored. - To use subclasses, manually flatten them before using the converter. - """ - - @staticmethod - def serialize_instances( - labels: LabelCollection, - image_root: Union[Path, str], - ignore_existing_data=False, - max_workers=8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process - Returns: - A dictionary containing labels in the coco object format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - return CocoInstanceDataset.from_common( - labels=labels, image_root=image_root, max_workers=max_workers - ).model_dump() - - @staticmethod - def serialize_panoptic( - labels: LabelCollection, - image_root: Union[Path, str], - mask_root: Union[Path, str], - all_stuff: bool = False, - ignore_existing_data=False, - max_workers: int = 8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - mask_root: Where to save segmentation masks to - all_stuff: If rectangle or polygon annotations are encountered, they will be treated as instances. - To convert them to stuff class set `all_stuff=True`. - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process. - Returns: - A dictionary containing labels in the coco panoptic format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - mask_root = create_path_if_not_exists(mask_root, ignore_existing_data) - return CocoPanopticDataset.from_common( - labels=labels, - image_root=image_root, - mask_root=mask_root, - all_stuff=all_stuff, - max_workers=max_workers, - ).model_dump() - - @staticmethod - def deserialize_panoptic( - json_data: Dict[str, Any], - image_root: Union[Path, str], - mask_root: Union[Path, str], - ) -> LabelGenerator: - """ - Convert coco panoptic data into the labelbox format (as a LabelGenerator). - - Args: - json_data: panoptic data as a dict - image_root: Path to local images that are referenced by the panoptic json - mask_root: Path to local segmentation masks that are referenced by the panoptic json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - mask_root = validate_path(mask_root, "mask_root") - objs = CocoPanopticDataset(**json_data) - gen = objs.to_common(image_root, mask_root) - return LabelGenerator(data=gen) - - @staticmethod - def deserialize_instances( - json_data: Dict[str, Any], image_root: Path - ) -> LabelGenerator: - """ - Convert coco object data into the labelbox format (as a LabelGenerator). - - Args: - json_data: coco object data as a dict - image_root: Path to local images that are referenced by the coco object json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - objs = CocoInstanceDataset(**json_data) - gen = objs.to_common(image_root) - return LabelGenerator(data=gen) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/image.py b/libs/labelbox/src/labelbox/data/serialization/coco/image.py deleted file mode 100644 index cef173377..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/image.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path - -from typing import Optional, Tuple -from PIL import Image -import imagesize - -from .path import PathSerializerMixin -from ...annotation_types import Label - - -class CocoImage(PathSerializerMixin): - id: int - width: int - height: int - file_name: Path - license: Optional[int] = None - flickr_url: Optional[str] = None - coco_url: Optional[str] = None - - -def get_image_id(label: Label, idx: int) -> int: - if label.data.file_path is not None: - file_name = label.data.file_path.replace(".jpg", "") - if file_name.isdecimal(): - return file_name - return idx - - -def get_image(label: Label, image_root: Path, image_id: str) -> CocoImage: - path = Path(image_root, f"{image_id}.jpg") - if not path.exists(): - im = Image.fromarray(label.data.value) - im.save(path) - w, h = im.size - else: - w, h = imagesize.get(str(path)) - return CocoImage(id=image_id, width=w, height=h, file_name=Path(path.name)) - - -def id_to_rgb(id: int) -> Tuple[int, int, int]: - digits = [] - for _ in range(3): - digits.append(id % 256) - id //= 256 - return digits - - -def rgb_to_id(red: int, green: int, blue: int) -> int: - id = blue * 256 * 256 - id += green * 256 - id += red - return id diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py deleted file mode 100644 index 5241e596f..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py +++ /dev/null @@ -1,266 +0,0 @@ -# https://cocodataset.org/#format-data - -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Any, Dict, List, Tuple, Optional -from pathlib import Path - -import numpy as np -from tqdm import tqdm - -from ...annotation_types import ( - ImageData, - MaskData, - Mask, - ObjectAnnotation, - Label, - Polygon, - Point, - Rectangle, -) -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .annotation import ( - COCOObjectAnnotation, - RLE, - get_annotation_lookup, - rle_decoding, -) -from .image import CocoImage, get_image, get_image_id -from pydantic import BaseModel - - -def mask_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - # This is going to fill any holes into the multipolygon - # If you need to support holes use the panoptic data format - shapely = annotation.value.shapely.simplify(1).buffer(0) - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - # Iterate over polygon once or multiple polygon for each item - area = shapely.area - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[ - np.array(s.exterior.coords).ravel().tolist() - for s in ([shapely] if shapely.type == "Polygon" else shapely.geoms) - ], - area=area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def vector_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> COCOObjectAnnotation: - shapely = annotation.value.shapely - xmin, ymin, xmax, ymax = shapely.bounds - segmentation = [] - if isinstance(annotation.value, Polygon): - for point in annotation.value.points: - segmentation.extend([point.x, point.y]) - else: - box = annotation.value - segmentation.extend( - [ - box.start.x, - box.start.y, - box.end.x, - box.start.y, - box.end.x, - box.end.y, - box.start.x, - box.end.y, - ] - ) - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[segmentation], - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def rle_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> ObjectAnnotation: - mask = rle_decoding( - class_annotations.segmentation.counts, - *class_annotations.segmentation.size[::-1], - ) - return ObjectAnnotation( - name=class_name, - value=Mask(mask=MaskData.from_2D_arr(mask), color=[1, 1, 1]), - ) - - -def segmentations_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> List[ObjectAnnotation]: - # Technically it is polygons. But the key in coco is called segmentations.. - annotations = [] - for points in class_annotations.segmentation: - annotations.append( - ObjectAnnotation( - name=class_name, - value=Polygon( - points=[ - Point(x=points[i], y=points[i + 1]) - for i in range(0, len(points), 2) - ] - ), - ) - ) - return annotations - - -def object_annotation_to_coco( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - if isinstance(annotation.value, Mask): - return mask_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - elif isinstance(annotation.value, (Polygon, Rectangle)): - return vector_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - else: - return None - - -def process_label( - label: Label, idx: int, image_root: str, max_annotations_per_image=10000 -) -> Tuple[np.ndarray, List[COCOObjectAnnotation], Dict[str, str]]: - annot_idx = idx * max_annotations_per_image - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - coco_annotations = [] - annotation_lookup = get_annotation_lookup(label.annotations) - categories = {} - for class_name in annotation_lookup: - for annotation in annotation_lookup[class_name]: - category_id = categories.get(annotation.name) or hash_category_name( - annotation.name - ) - coco_annotation = object_annotation_to_coco( - annotation, annot_idx, image_id, category_id - ) - if coco_annotation is not None: - coco_annotations.append(coco_annotation) - if annotation.name not in categories: - categories[annotation.name] = category_id - annot_idx += 1 - - return image, coco_annotations, categories - - -class CocoInstanceDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[COCOObjectAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, labels: LabelCollection, image_root: Path, max_workers=8 - ): - all_coco_annotations = [] - categories = {} - images = [] - futures = [] - coco_categories = {} - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit(process_label, label, idx, image_root) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.extend(result[1]) - coco_categories.update(result[2]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=1, - ) - for name, idx in coco_categories.items() - ] - for annot in all_coco_annotations: - annot.category_id = category_mapping[annot.category_id] - - return CocoInstanceDataset( - info={"image_root": image_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = get_annotation_lookup(self.annotations) - - for image in self.images: - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - - data = ImageData(file_path=str(im_path)) - annotations = [] - for class_annotations in annotation_lookup[image.id]: - if isinstance(class_annotations.segmentation, RLE): - annotations.append( - rle_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - elif isinstance(class_annotations.segmentation, list): - annotations.extend( - segmentations_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - yield Label(data=data, annotations=annotations) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py deleted file mode 100644 index cbb410548..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py +++ /dev/null @@ -1,242 +0,0 @@ -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Dict, Any, List, Union -from pathlib import Path - -from tqdm import tqdm -import numpy as np -from PIL import Image - -from ...annotation_types.geometry import Polygon, Rectangle -from ...annotation_types import Label -from ...annotation_types.geometry.mask import Mask -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.data.raster import MaskData, ImageData -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .image import CocoImage, get_image, get_image_id, id_to_rgb -from .annotation import PanopticAnnotation, SegmentInfo, get_annotation_lookup -from pydantic import BaseModel - - -def vector_to_coco_segment_info( - canvas: np.ndarray, - annotation: ObjectAnnotation, - annotation_idx: int, - image: CocoImage, - category_id: int, -): - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = annotation.value.draw( - height=image.height, - width=image.width, - canvas=canvas, - color=id_to_rgb(annotation_idx), - ) - - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def mask_to_coco_segment_info( - canvas: np.ndarray, annotation, annotation_idx: int, category_id -): - color = id_to_rgb(annotation_idx) - mask = annotation.value.draw(color=color) - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = np.where(canvas == (0, 0, 0), mask, canvas) - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def process_label( - label: Label, idx: Union[int, str], image_root, mask_root, all_stuff=False -): - """ - Masks become stuff - Polygon and rectangle become thing - """ - annotations = get_annotation_lookup(label.annotations) - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - canvas = np.zeros((image.height, image.width, 3)) - - segments = [] - categories = {} - is_thing = {} - - for class_idx, class_name in enumerate(annotations): - for annotation_idx, annotation in enumerate(annotations[class_name]): - categories[annotation.name] = hash_category_name(annotation.name) - if isinstance(annotation.value, Mask): - coco_segment_info = mask_to_coco_segment_info( - canvas, - annotation, - class_idx + 1, - categories[annotation.name], - ) - - if coco_segment_info is None: - # Filter out empty masks - continue - - segment, canvas = coco_segment_info - segments.append(segment) - is_thing[annotation.name] = 0 - - elif isinstance(annotation.value, (Polygon, Rectangle)): - coco_vector_info = vector_to_coco_segment_info( - canvas, - annotation, - annotation_idx=(class_idx if all_stuff else annotation_idx) - + 1, - image=image, - category_id=categories[annotation.name], - ) - - if coco_vector_info is None: - # Filter out empty annotations - continue - - segment, canvas = coco_vector_info - segments.append(segment) - is_thing[annotation.name] = 1 - int(all_stuff) - - mask_file = str(image.file_name).replace(".jpg", ".png") - mask_file = Path(mask_root, mask_file) - Image.fromarray(canvas.astype(np.uint8)).save(mask_file) - return ( - image, - PanopticAnnotation( - image_id=image_id, - file_name=Path(mask_file.name), - segments_info=segments, - ), - categories, - is_thing, - ) - - -class CocoPanopticDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[PanopticAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, - labels: LabelCollection, - image_root, - mask_root, - all_stuff, - max_workers=8, - ): - all_coco_annotations = [] - coco_categories = {} - coco_things = {} - images = [] - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit( - process_label, - label, - idx, - image_root, - mask_root, - all_stuff, - ) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root, mask_root, all_stuff) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.append(result[1]) - coco_categories.update(result[2]) - coco_things.update(result[3]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=coco_things.get(name, 1), - ) - for name, idx in coco_categories.items() - ] - - for annot in all_coco_annotations: - for segment in annot.segments_info: - segment.category_id = category_mapping[segment.category_id] - - return CocoPanopticDataset( - info={"image_root": image_root, "mask_root": mask_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root: Path, mask_root: Path): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = { - annotation.image_id: annotation for annotation in self.annotations - } - for image in self.images: - annotations = [] - annotation = annotation_lookup[image.id] - - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - if not str(annotation.file_name).endswith(".png"): - raise ValueError( - f"COCO masks must be stored as png files and their extension must be `.png`. Found {annotation.file_name}" - ) - mask = MaskData( - file_path=str(Path(mask_root, annotation.file_name)) - ) - - for segmentation in annotation.segments_info: - category = category_lookup[segmentation.category_id] - annotations.append( - ObjectAnnotation( - name=category.name, - value=Mask(mask=mask, color=id_to_rgb(segmentation.id)), - ) - ) - data = ImageData(file_path=str(im_path)) - yield Label(data=data, annotations=annotations) - del annotation_lookup[image.id] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/path.py b/libs/labelbox/src/labelbox/data/serialization/coco/path.py deleted file mode 100644 index c3be84f31..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/path.py +++ /dev/null @@ -1,9 +0,0 @@ -from pathlib import Path -from pydantic import BaseModel, model_serializer - - -class PathSerializerMixin(BaseModel): - @model_serializer(mode="wrap") - def serialize_model(self, handler): - res = handler(self) - return {k: str(v) if isinstance(v, Path) else v for k, v in res.items()} diff --git a/libs/labelbox/tests/data/serialization/coco/test_coco.py b/libs/labelbox/tests/data/serialization/coco/test_coco.py deleted file mode 100644 index a7c733ce5..000000000 --- a/libs/labelbox/tests/data/serialization/coco/test_coco.py +++ /dev/null @@ -1,38 +0,0 @@ -import json -from pathlib import Path - -from labelbox.data.serialization.coco import COCOConverter - -COCO_ASSETS_DIR = "tests/data/assets/coco" - - -def run_instances(tmpdir): - instance_json = json.load(open(Path(COCO_ASSETS_DIR, "instances.json"))) - res = COCOConverter.deserialize_instances( - instance_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances( - res, - Path(tmpdir), - ) - - -def test_rle_objects(tmpdir): - rle_json = json.load(open(Path(COCO_ASSETS_DIR, "rle.json"))) - res = COCOConverter.deserialize_instances( - rle_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances(res, tmpdir) - - -def test_panoptic(tmpdir): - panoptic_json = json.load(open(Path(COCO_ASSETS_DIR, "panoptic.json"))) - image_dir, mask_dir = [ - Path(COCO_ASSETS_DIR, dir_name) for dir_name in ["images", "masks"] - ] - res = COCOConverter.deserialize_panoptic(panoptic_json, image_dir, mask_dir) - back = COCOConverter.serialize_panoptic( - res, - Path(f"/{tmpdir}/images_panoptic"), - Path(f"/{tmpdir}/masks_panoptic"), - ) From 9cf28a10ef556c2a674b25c26fc8bf4f75006103 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:46:16 -0500 Subject: [PATCH 12/35] Fixed video --- libs/labelbox/tests/unit/test_label_data_type.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 7bc32e37c..662fa5a5a 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -1,11 +1,7 @@ -from email import message import pytest -from pydantic import ValidationError - from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.label import Label @@ -42,9 +38,9 @@ def test_video_data_type(): "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", } with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=VideoData(**data)) + label = Label(data=GenericDataRowData(**data)) data = label.data - assert isinstance(data, VideoData) + assert isinstance(data, GenericDataRowData) assert ( data.global_key == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" From 7fc10bb4678000ba6270086b46d06bb8057b6b50 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:51:12 -0500 Subject: [PATCH 13/35] Removed data type test --- libs/labelbox/tests/unit/test_label_data_type.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 662fa5a5a..611324f78 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -33,20 +33,6 @@ def test_generic_data_type_validations(): Label(data=data) -def test_video_data_type(): - data = { - "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", - } - with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=GenericDataRowData(**data)) - data = label.data - assert isinstance(data, GenericDataRowData) - assert ( - data.global_key - == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" - ) - - def test_generic_data_row(): data = { "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", From 0b810fbc939556e5724b726613167f5da1921992 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:42:52 -0500 Subject: [PATCH 14/35] Made fix --- .../labelbox/tests/data/annotation_types/test_collection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 8b2627776..f9917cf82 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -95,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -108,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -123,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[ ObjectAnnotation( name="1234", From f8b8fa361b0c0996c62700ca3e5c75081c24d2fe Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:53:19 -0500 Subject: [PATCH 15/35] Fix list of labels --- libs/labelbox/tests/data/annotation_types/test_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index f9917cf82..57ba57962 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -20,7 +20,7 @@ @pytest.fixture def list_of_labels(): return [ - Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + Label(data=GenericDataRowData(uid="http://someurl")) for _ in range(5) ] From 2329324b93014ef313bac436248fb37c6d96108b Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:08:03 -0500 Subject: [PATCH 16/35] Removed add url --- .../data/annotation_types/collection.py | 20 ------------------- .../labelbox/data/annotation_types/label.py | 13 ------------ .../data/annotation_types/test_collection.py | 14 ------------- 3 files changed, 47 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index d90204309..9eb1fe53e 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,26 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_url_to_data( - self, signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that signs urls as data is accessed - """ - - def _add_url_to_data(label: Label): - label.add_url_to_data(signer) - return label - - self._fns["add_url_to_data"] = _add_url_to_data - return self - def add_to_dataset( self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 9d5b92bdd..a18460bc1 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -90,19 +90,6 @@ def frame_annotations( frame_dict[annotation.frame].append(annotation) return frame_dict - def add_url_to_data(self, signer) -> "Label": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - Label with updated references to new data url - """ - self.data.create_url(signer) - return self - def add_url_to_masks(self, signer) -> "Label": """ Creates signed urls for all masks in the Label. diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 57ba57962..17316f811 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,18 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_urls(signer): - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_url_to_data(signer(uuid)) - assert label.data.url != uuid - assert next(generator).data.url == uuid - assert label.data.url == uuid - - def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( @@ -113,9 +101,7 @@ def test_adding_to_dataset(signer): ) uuid = str(uuid4()) generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - assert label.data.url != uuid generated_label = next(generator) - assert generated_label.data.url == uuid assert generated_label.data.external_id != None assert generated_label.data.uid == dataset.uid assert label.data.url == uuid From 4457f25bd907c68d9cc2ee6c32eb3000b191dcf2 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:35:51 -0500 Subject: [PATCH 17/35] Removed rest of tests --- .../data/annotation_types/collection.py | 21 ----------------- .../data/annotation_types/data/test_raster.py | 13 +++++------ .../data/annotation_types/test_collection.py | 14 ----------- .../data/annotation_types/test_metrics.py | 15 ++++++++---- .../data/annotation_types/test_tiled_image.py | 23 ------------------- .../serialization/ndjson/test_conversation.py | 10 ++++---- .../data/serialization/ndjson/test_dicom.py | 14 ++++++----- .../serialization/ndjson/test_document.py | 2 +- .../serialization/ndjson/test_free_text.py | 6 ++--- .../data/serialization/ndjson/test_video.py | 15 ++++++------ 10 files changed, 41 insertions(+), 92 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 9eb1fe53e..2e76176a8 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,27 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_to_dataset( - self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates data rows from each labels data object and attaches the data to the given dataset. - Updates the label's data object to have the same external_id and uid as the data row. - - Args: - dataset: labelbox dataset object to add the new data row to - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that updates references to the new data rows as data is accessed - """ - - def _add_to_dataset(label: Label): - label.create_data_row(dataset, signer) - return label - - self._fns["assign_datarow_ids"] = _add_to_dataset - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/tests/data/annotation_types/data/test_raster.py b/libs/labelbox/tests/data/annotation_types/data/test_raster.py index 6bc8f2bbf..304ed3e95 100644 --- a/libs/labelbox/tests/data/annotation_types/data/test_raster.py +++ b/libs/labelbox/tests/data/annotation_types/data/test_raster.py @@ -5,26 +5,26 @@ import pytest from PIL import Image -from labelbox.data.annotation_types.data import ImageData +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from pydantic import ValidationError def test_validate_schema(): with pytest.raises(ValidationError): - data = ImageData() + GenericDataRowData() def test_im_bytes(): data = (np.random.random((32, 32, 3)) * 255).astype(np.uint8) im_bytes = BytesIO() Image.fromarray(data).save(im_bytes, format="PNG") - raster_data = ImageData(im_bytes=im_bytes.getvalue()) + raster_data = MaskData(im_bytes=im_bytes.getvalue()) data_ = raster_data.value assert np.all(data == data_) def test_im_url(): - raster_data = ImageData(url="https://picsum.photos/id/829/200/300") + raster_data = GenericDataRowData(url="https://picsum.photos/id/829/200/300") data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -32,7 +32,7 @@ def test_im_url(): def test_im_path(): img_path = "/tmp/img.jpg" urllib.request.urlretrieve("https://picsum.photos/id/829/200/300", img_path) - raster_data = ImageData(file_path=img_path) + raster_data = GenericDataRowData(file_path=img_path) data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -42,8 +42,7 @@ def test_ref(): uid = "uid" metadata = [] media_attributes = {} - data = ImageData( - im_bytes=b"", + data = GenericDataRowData( external_id=external_id, uid=uid, metadata=metadata, diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 17316f811..f818b94ff 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,20 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_to_dataset(signer): - dataset = FakeDataset() - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - generated_label = next(generator) - assert generated_label.data.external_id != None - assert generated_label.data.uid == dataset.uid - assert label.data.url == uuid - - def test_adding_to_masks(signer): label = Label( data=GenericDataRowData(uid="12345"), diff --git a/libs/labelbox/tests/data/annotation_types/test_metrics.py b/libs/labelbox/tests/data/annotation_types/test_metrics.py index 94c9521a5..4e9355573 100644 --- a/libs/labelbox/tests/data/annotation_types/test_metrics.py +++ b/libs/labelbox/tests/data/annotation_types/test_metrics.py @@ -8,7 +8,11 @@ ConfusionMatrixMetric, ScalarMetric, ) -from labelbox.data.annotation_types import ScalarMetric, Label, ImageData +from labelbox.data.annotation_types import ( + ScalarMetric, + Label, + GenericDataRowData, +) from labelbox.data.annotation_types.metrics.scalar import RESERVED_METRIC_NAMES from pydantic import ValidationError @@ -19,7 +23,8 @@ def test_legacy_scalar_metric(): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -72,7 +77,8 @@ def test_custom_scalar_metric(feature_name, subclass_name, aggregation, value): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -134,7 +140,8 @@ def test_custom_confusison_matrix_metric( assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 46f2383d6..9b96c9445 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,8 +6,6 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, - TileLayer, - TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -37,27 +35,6 @@ def test_tiled_bounds_same(epsg): ) -def test_create_tiled_image_data(): - bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] - url = ( - "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" - ) - zoom_levels = (1, 10) - - tile_layer = TileLayer(url=url, name="slippy map tile") - tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) - tiled_image_data = TiledImageData( - tile_layer=tile_layer, - tile_bounds=tile_bounds, - zoom_levels=zoom_levels, - version=2, - ) - assert isinstance(tiled_image_data, TiledImageData) - assert tiled_image_data.tile_bounds.bounds == bounds_points - assert tiled_image_data.tile_layer.url == url - assert tiled_image_data.zoom_levels == zoom_levels - - def test_epsg_point_projections(): zoom = 4 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..5aa7285e2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -19,7 +19,7 @@ radio_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="radio", @@ -48,7 +48,7 @@ checklist_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="checklist", @@ -78,7 +78,7 @@ ] free_text_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -164,7 +164,7 @@ def test_conversation_entity_import_without_confidence(): def test_benchmark_reference_label_flag_enabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -181,7 +181,7 @@ def test_benchmark_reference_label_flag_enabled(): def test_benchmark_reference_label_flag_disabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..6a00fa871 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -31,7 +31,7 @@ ] polyline_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), + data=lb_types.GenericDataRowData(uid="test-uid"), annotations=dicom_polyline_annotations, ) @@ -58,7 +58,7 @@ } polyline_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=dicom_polyline_annotations, ) @@ -109,11 +109,12 @@ } video_mask_label = lb_types.Label( - data=lb_types.VideoData(uid="test-uid"), annotations=[video_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[video_mask_annotation], ) video_mask_label_with_global_key = lb_types.Label( - data=lb_types.VideoData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[video_mask_annotation], ) """ @@ -128,11 +129,12 @@ ) dicom_mask_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), annotations=[dicom_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[dicom_mask_annotation], ) dicom_mask_label_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[dicom_mask_annotation], ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..fcdf4368b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -26,7 +26,7 @@ ) bbox_labels = [ lb_types.Label( - data=lb_types.DocumentData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[bbox_annotation], ) ] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..7b03a8447 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -5,7 +5,7 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -14,7 +14,7 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), @@ -38,7 +38,7 @@ def test_serialization(): def test_nested_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..6c14343a4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -6,11 +6,10 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.geometry.line import Line from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.geometry.rectangle import Rectangle -from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import ( @@ -28,7 +27,7 @@ def test_video(): labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( feature_schema_id="ckrb1sfjx099a0y914hl319ie", @@ -304,7 +303,7 @@ def test_video_name_only(): data = json.load(file) labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( name="question 1", @@ -574,7 +573,7 @@ def test_video_name_only(): def test_video_classification_global_subclassifications(): label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=[ @@ -790,7 +789,7 @@ def test_video_classification_nesting_bbox(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -940,7 +939,7 @@ def test_video_classification_point(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -1108,7 +1107,7 @@ def test_video_classification_frameline(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, From f5b6c7d33bff71659f125107cae86ac62b3a4434 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:15:43 -0500 Subject: [PATCH 18/35] Fix tests --- .../tests/data/annotation_types/test_label.py | 20 +++++++++++-------- .../test_export_video_streamable.py | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 5bdfb6bde..8439837ed 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -17,7 +17,7 @@ ObjectAnnotation, Point, Line, - ImageData, + MaskData, Label, ) import pytest @@ -26,7 +26,9 @@ def test_schema_assignment_geometry(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -51,7 +53,7 @@ def test_schema_assignment_classification(): option_name = "my_option" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ClassificationAnnotation( value=Radio(answer=ClassificationAnswer(name=option_name)), @@ -102,7 +104,7 @@ def test_schema_assignment_subclass(): value=Radio(answer=ClassificationAnswer(name=option_name)), ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -167,7 +169,9 @@ def test_highly_nested(): ], ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -230,7 +234,7 @@ def test_highly_nested(): def test_schema_assignment_confidence(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line( @@ -252,10 +256,10 @@ def test_initialize_label_no_coercion(): value=lb_types.ConversationEntity(start=0, end=8, message_id="4"), ) label = Label( - data=lb_types.ConversationData(global_key=global_key), + data=lb_types.GenericDataRowData(global_key=global_key), annotations=[ner_annotation], ) - assert isinstance(label.data, lb_types.ConversationData) + assert isinstance(label.data, lb_types.GenericDataRowData) assert label.data.global_key == global_key diff --git a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py index 115194a58..28ef6e0cf 100644 --- a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py +++ b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py @@ -4,7 +4,7 @@ import labelbox as lb import labelbox.types as lb_types -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.schema.annotation_import import AnnotationImportState from labelbox.schema.export_task import ExportTask, StreamType @@ -41,7 +41,7 @@ def test_export( for data_row_uid in data_row_uids: labels = [ lb_types.Label( - data=VideoData(uid=data_row_uid), + data=GenericDataRowData(uid=data_row_uid), annotations=bbox_video_annotation_objects, ) ] From ce60b24be86fa8f60b9aad0372757f96ee2007ed Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:16:05 -0500 Subject: [PATCH 19/35] Finish PR --- libs/labelbox/src/labelbox/data/annotation_types/label.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index a18460bc1..8ae05f898 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -3,9 +3,7 @@ import warnings import labelbox -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -19,7 +17,7 @@ from .video import VideoObjectAnnotation, VideoMaskAnnotation from .mmc import MessageEvaluationTaskAnnotation from ..ontology import get_feature_schema_lookup -from pydantic import BaseModel, field_validator, model_serializer +from pydantic import BaseModel, field_validator class Label(BaseModel): @@ -43,7 +41,7 @@ class Label(BaseModel): """ uid: Optional[Cuid] = None - data: GenericDataRowData + data: Union[GenericDataRowData, MaskData] annotations: List[ Union[ ClassificationAnnotation, From 12aa8c56aaa63ec49400d21c9432b2f057689455 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:44:14 -0500 Subject: [PATCH 20/35] Added back in tile data since some of its parameters are required --- .../data/annotation_types/data/tiled_image.py | 294 ++++++++++++++++++ .../data/annotation_types/test_tiled_image.py | 23 ++ 2 files changed, 317 insertions(+) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index cdb7f4127..adb8db549 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,6 +88,300 @@ def validate_bounds_lat_lng(self): return self +class TileLayer(BaseModel): + """Url that contains the tile layer. Must be in the format: + + https://c.tile.openstreetmap.org/{z}/{x}/{y}.png + + >>> layer = TileLayer( + url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", + name="slippy map tile" + ) + """ + + url: str + name: Optional[str] = "default" + + def asdict(self) -> Dict[str, str]: + return {"tileLayerUrl": self.url, "name": self.name} + + @field_validator("url") + def validate_url(cls, url): + xyz_format = "/{z}/{x}/{y}" + if xyz_format not in url: + raise ValueError(f"{url} needs to contain {xyz_format}") + return url + + +class TiledImageData(BaseData): + """Represents tiled imagery + + If specified version is 2, converts bounds from [lng,lat] to [lat,lng] + + Requires the following args: + tile_layer: TileLayer + tile_bounds: TiledBounds + zoom_levels: List[int] + Optional args: + max_native_zoom: int = None + tile_size: Optional[int] + version: int = 2 + alternative_layers: List[TileLayer] + + >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, + tile_bounds=TiledBounds, + zoom_levels=[1, 12]) + """ + + tile_layer: TileLayer + tile_bounds: TiledBounds + alternative_layers: List[TileLayer] = [] + zoom_levels: Tuple[int, int] + max_native_zoom: Optional[int] = None + tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE + version: Optional[int] = 2 + multithread: bool = True + + def __post_init__(self) -> None: + if self.max_native_zoom is None: + self.max_native_zoom = self.zoom_levels[0] + + def asdict(self) -> Dict[str, str]: + return { + "tileLayerUrl": self.tile_layer.url, + "bounds": [ + [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], + [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], + ], + "minZoom": self.zoom_levels[0], + "maxZoom": self.zoom_levels[1], + "maxNativeZoom": self.max_native_zoom, + "epsg": self.tile_bounds.epsg.name, + "tileSize": self.tile_size, + "alternativeLayers": [ + layer.asdict() for layer in self.alternative_layers + ], + "version": self.version, + } + + def raster_data( + self, zoom: int = 0, max_tiles: int = 32, multithread=True + ) -> RasterData: + """Converts the tiled image asset into a RasterData object containing an + np.ndarray. + + Uses the minimum zoom provided to render the image. + """ + if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: + xstart, ystart, xend, yend = self._get_simple_image_params(zoom) + elif self.tile_bounds.epsg == EPSG.EPSG4326: + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, self.tile_bounds + ) + elif self.tile_bounds.epsg == EPSG.EPSG3857: + # transform to 4326 + transformer = EPSGTransformer.create_geo_to_geo_transformer( + EPSG.EPSG3857, EPSG.EPSG4326 + ) + transforming_bounds = [ + transformer(self.tile_bounds.bounds[0]), + transformer(self.tile_bounds.bounds[1]), + ] + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, transforming_bounds + ) + else: + raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") + + self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) + + rounded_tiles, pixel_offsets = list( + zip( + *[ + self._tile_to_pixel(pt) + for pt in [xstart, ystart, xend, yend] + ] + ) + ) + + image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) + arr = self._crop_to_bounds(image, *pixel_offsets) + return RasterData(arr=arr) + + @property + def value(self) -> np.ndarray: + """Returns the value of a generated RasterData object.""" + return self.raster_data( + self.zoom_levels[0], multithread=self.multithread + ).value + + def _get_simple_image_params( + self, zoom + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + + Simple has different order of x / y than lat / lng because of how leaflet behaves + leaflet reports all points as pixel locations at a zoom of 0 + """ + xend, xstart, yend, ystart = ( + self.tile_bounds.bounds[1].x, + self.tile_bounds.bounds[0].x, + self.tile_bounds.bounds[1].y, + self.tile_bounds.bounds[0].y, + ) + return ( + *[ + x * (2 ** (zoom)) / self.tile_size + for x in [xstart, ystart, xend, yend] + ], + ) + + def _get_3857_image_params( + self, zoom: int, bounds: TiledBounds + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + """ + lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y + lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x + + # Convert to zoom 0 tile coordinates + xstart, ystart = self._latlng_to_tile(lat_start, lng_start) + xend, yend = self._latlng_to_tile(lat_end, lng_end) + + # Make sure that the tiles are increasing in order + xstart, xend = min(xstart, xend), max(xstart, xend) + ystart, yend = min(ystart, yend), max(ystart, yend) + return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) + + def _latlng_to_tile( + self, lat: float, lng: float, zoom=0 + ) -> Tuple[float, float]: + """Converts lat/lng to 3857 tile coordinates + Formula found here: + https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 + """ + scale = 2**zoom + lat_rad = math.radians(lat) + x = (lng + 180.0) / 360.0 * scale + y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale + return x, y + + def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: + """Rounds a tile coordinate and reports the remainder in pixels""" + rounded_tile = int(tile) + remainder = tile - rounded_tile + pixel_offset = int(self.tile_size * remainder) + return rounded_tile, pixel_offset + + def _fetch_image_for_bounds( + self, + x_tile_start: int, + y_tile_start: int, + x_tile_end: int, + y_tile_end: int, + zoom: int, + multithread=True, + ) -> np.ndarray: + """Fetches the tiles and combines them into a single image. + + If a tile cannot be fetched, a padding of expected tile size is instead added. + """ + + if multithread: + tiles = {} + with ThreadPoolExecutor( + max_workers=TILE_DOWNLOAD_CONCURRENCY + ) as exc: + for x in range(x_tile_start, x_tile_end + 1): + for y in range(y_tile_start, y_tile_end + 1): + tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) + + rows = [] + for y in range(y_tile_start, y_tile_end + 1): + row = [] + for x in range(x_tile_start, x_tile_end + 1): + try: + if multithread: + row.append(tiles[(x, y)].result()) + else: + row.append(self._fetch_tile(x, y, zoom)) + except: + row.append( + np.zeros( + shape=(self.tile_size, self.tile_size, 3), + dtype=np.uint8, + ) + ) + rows.append(np.hstack(row)) + + return np.vstack(rows) + + @retry.Retry(initial=1, maximum=16, multiplier=2) + def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: + """ + Fetches the image and returns an np array. + """ + data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) + data.raise_for_status() + decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] + if decoded.shape[:2] != (self.tile_size, self.tile_size): + logger.warning(f"Unexpected tile size {decoded.shape}.") + return decoded + + def _crop_to_bounds( + self, + image: np.ndarray, + x_px_start: int, + y_px_start: int, + x_px_end: int, + y_px_end: int, + ) -> np.ndarray: + """This function slices off the excess pixels that are outside of the bounds. + This occurs because only full tiles can be downloaded at a time. + """ + + def invert_point(pt): + # Must have at least 1 pixel for stability. + pt = max(pt, 1) + # All pixel points are relative to a single tile + # So subtracting the tile size inverts the axis + pt = pt - self.tile_size + return pt if pt != 0 else None + + x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) + return image[y_px_start:y_px_end, x_px_start:x_px_end, :] + + def _validate_num_tiles( + self, + xstart: float, + ystart: float, + xend: float, + yend: float, + max_tiles: int, + ): + """Calculates the number of expected tiles we would fetch. + + If this is greater than the number of max tiles, raise an error. + """ + total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) + if total_n_tiles > max_tiles: + raise ValueError( + f"Requested zoom results in {total_n_tiles} tiles." + f"Max allowed tiles are {max_tiles}" + f"Increase max tiles or reduce zoom level." + ) + + @field_validator("zoom_levels") + def validate_zoom_levels(cls, zoom_levels): + if zoom_levels[0] > zoom_levels[1]: + raise ValueError( + f"Order of zoom levels should be min, max. Received {zoom_levels}" + ) + return zoom_levels + + class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 9b96c9445..46f2383d6 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,6 +6,8 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, + TileLayer, + TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -35,6 +37,27 @@ def test_tiled_bounds_same(epsg): ) +def test_create_tiled_image_data(): + bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] + url = ( + "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" + ) + zoom_levels = (1, 10) + + tile_layer = TileLayer(url=url, name="slippy map tile") + tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) + tiled_image_data = TiledImageData( + tile_layer=tile_layer, + tile_bounds=tile_bounds, + zoom_levels=zoom_levels, + version=2, + ) + assert isinstance(tiled_image_data, TiledImageData) + assert tiled_image_data.tile_bounds.bounds == bounds_points + assert tiled_image_data.tile_layer.url == url + assert tiled_image_data.zoom_levels == zoom_levels + + def test_epsg_point_projections(): zoom = 4 From b614cedc41ea07dd35cc69c5e777ad7206a2407e Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:58:36 -0500 Subject: [PATCH 21/35] Removed data types besides generic data row data --- .../data/annotation_types/__init__.py | 14 +- .../data/annotation_types/data/__init__.py | 12 +- .../data/annotation_types/data/audio.py | 7 - .../annotation_types/data/conversation.py | 7 - .../data/annotation_types/data/dicom.py | 7 - .../data/annotation_types/data/document.py | 7 - .../data/annotation_types/data/html.py | 7 - .../data/llm_prompt_creation.py | 7 - .../data/llm_prompt_response_creation.py | 9 - .../data/llm_response_creation.py | 7 - .../data/annotation_types/data/raster.py | 7 +- .../data/annotation_types/data/text.py | 116 ------- .../data/annotation_types/data/tiled_image.py | 294 ------------------ .../data/annotation_types/data/video.py | 173 ----------- .../labelbox/data/annotation_types/label.py | 42 +-- .../serialization/ndjson/classification.py | 12 +- .../data/serialization/ndjson/label.py | 40 +-- .../data/serialization/ndjson/metric.py | 8 +- .../labelbox/data/serialization/ndjson/mmc.py | 3 +- .../data/serialization/ndjson/objects.py | 29 +- .../data/serialization/ndjson/relationship.py | 4 +- libs/labelbox/src/labelbox/utils.py | 4 +- .../data/annotation_types/test_collection.py | 16 +- .../serialization/ndjson/test_checklist.py | 14 +- .../data/serialization/ndjson/test_image.py | 3 +- .../data/serialization/ndjson/test_radio.py | 8 +- .../data/serialization/ndjson/test_text.py | 5 +- 27 files changed, 59 insertions(+), 803 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/audio.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/document.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/html.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/text.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/video.py diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index 7908bc242..84d6d65a5 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -32,18 +32,8 @@ from .classification import Radio from .classification import Text -from .data import AudioData -from .data import ConversationData -from .data import DicomData -from .data import DocumentData -from .data import HTMLData -from .data import ImageData +from .data import GenericDataRowData from .data import MaskData -from .data import TextData -from .data import VideoData -from .data import LlmPromptResponseCreationData -from .data import LlmPromptCreationData -from .data import LlmResponseCreationData from .label import Label from .collection import LabelGenerator @@ -58,8 +48,6 @@ from .data.tiled_image import EPSG from .data.tiled_image import EPSGTransformer from .data.tiled_image import TiledBounds -from .data.tiled_image import TiledImageData -from .data.tiled_image import TileLayer from .llm_prompt_response.prompt import PromptText from .llm_prompt_response.prompt import PromptClassificationAnnotation diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py index 2522b2741..8d5e7289b 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py @@ -1,12 +1,2 @@ -from .audio import AudioData -from .conversation import ConversationData -from .dicom import DicomData -from .document import DocumentData -from .html import HTMLData -from .raster import ImageData from .raster import MaskData -from .text import TextData -from .video import VideoData -from .llm_prompt_response_creation import LlmPromptResponseCreationData -from .llm_prompt_creation import LlmPromptCreationData -from .llm_response_creation import LlmResponseCreationData +from .generic_data_row_data import GenericDataRowData diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py deleted file mode 100644 index 916fca99d..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class AudioData(BaseData, _NoCoercionMixin): - class_name: Literal["AudioData"] = "AudioData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py deleted file mode 100644 index ef6507dca..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py b/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py deleted file mode 100644 index ae4c377dc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DicomData(BaseData, _NoCoercionMixin): - class_name: Literal["DicomData"] = "DicomData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py b/libs/labelbox/src/labelbox/data/annotation_types/data/document.py deleted file mode 100644 index 810a3ed3e..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DocumentData(BaseData, _NoCoercionMixin): - class_name: Literal["DocumentData"] = "DocumentData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py b/libs/labelbox/src/labelbox/data/annotation_types/data/html.py deleted file mode 100644 index 7a78fcb7b..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class HTMLData(BaseData, _NoCoercionMixin): - class_name: Literal["HTMLData"] = "HTMLData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py deleted file mode 100644 index a1b0450bc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptCreationData"] = "LlmPromptCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py deleted file mode 100644 index a8dfce894..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py +++ /dev/null @@ -1,9 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptResponseCreationData"] = ( - "LlmPromptResponseCreationData" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py deleted file mode 100644 index a8963ed3f..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmResponseCreationData"] = "LlmResponseCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index cfdc4e2f1..3a4e8bb6e 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -11,8 +11,10 @@ from requests.exceptions import ConnectTimeout from typing_extensions import Literal +from pydantic import BaseModel, model_validator, ConfigDict +from labelbox.exceptions import InternalServerError + from ..types import TypedArray -from .base_data import BaseData class RasterData(BaseModel, ABC): @@ -222,6 +224,3 @@ class MaskData(RasterData): url: Optional[str] = None arr: Optional[TypedArray[Literal['uint8']]] = None """ - - -class ImageData(RasterData, BaseData): ... diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py b/libs/labelbox/src/labelbox/data/annotation_types/data/text.py deleted file mode 100644 index cabad4836..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py +++ /dev/null @@ -1,116 +0,0 @@ -from typing import Callable, Optional - -import requests -from google.api_core import retry -from lbox.exceptions import InternalServerError -from pydantic import ConfigDict, model_validator -from requests.exceptions import ConnectTimeout - -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin - -from .base_data import BaseData - - -class TextData(BaseData, _NoCoercionMixin): - """ - Represents text data. Requires arg file_path, text, or url - - >>> TextData(text="") - - Args: - file_path (str) - text (str) - url (str) - """ - - class_name: Literal["TextData"] = "TextData" - file_path: Optional[str] = None - text: Optional[str] = None - url: Optional[str] = None - model_config = ConfigDict(extra="forbid") - - @property - def value(self) -> str: - """ - Property that unifies the data access pattern for all references to the text. - - Returns: - string representation of the text - """ - if self.text: - return self.text - elif self.file_path: - with open(self.file_path, "r") as file: - text = file.read() - self.text = text - return text - elif self.url: - text = self.fetch_remote() - self.text = text - return text - else: - raise ValueError("Must set either url, file_path or im_bytes") - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry( - deadline=15.0, - predicate=retry.if_exception_type(ConnectTimeout, InternalServerError), - ) - def fetch_remote(self) -> str: - """ - Method for accessing url. - - If url is not publicly accessible or requires another access pattern - simply override this function - """ - response = requests.get(self.url) - if response.status_code in [500, 502, 503, 504]: - raise InternalServerError(response.text) - response.raise_for_status() - return response.text - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other text references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the text - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.text is not None: - self.url = signer(self.text.encode()) - else: - raise ValueError( - "One of url, im_bytes, file_path, numpy must not be None." - ) - return self.url - - @model_validator(mode="after") - def validate_date(self, values): - file_path = self.file_path - text = self.text - url = self.url - uid = self.uid - global_key = self.global_key - if uid == file_path == text == url == global_key is None: - raise ValueError( - "One of `file_path`, `text`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"TextData(file_path={self.file_path}," - f"text={self.text[:30] + '...' if self.text is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index adb8db549..cdb7f4127 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,300 +88,6 @@ def validate_bounds_lat_lng(self): return self -class TileLayer(BaseModel): - """Url that contains the tile layer. Must be in the format: - - https://c.tile.openstreetmap.org/{z}/{x}/{y}.png - - >>> layer = TileLayer( - url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", - name="slippy map tile" - ) - """ - - url: str - name: Optional[str] = "default" - - def asdict(self) -> Dict[str, str]: - return {"tileLayerUrl": self.url, "name": self.name} - - @field_validator("url") - def validate_url(cls, url): - xyz_format = "/{z}/{x}/{y}" - if xyz_format not in url: - raise ValueError(f"{url} needs to contain {xyz_format}") - return url - - -class TiledImageData(BaseData): - """Represents tiled imagery - - If specified version is 2, converts bounds from [lng,lat] to [lat,lng] - - Requires the following args: - tile_layer: TileLayer - tile_bounds: TiledBounds - zoom_levels: List[int] - Optional args: - max_native_zoom: int = None - tile_size: Optional[int] - version: int = 2 - alternative_layers: List[TileLayer] - - >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, - tile_bounds=TiledBounds, - zoom_levels=[1, 12]) - """ - - tile_layer: TileLayer - tile_bounds: TiledBounds - alternative_layers: List[TileLayer] = [] - zoom_levels: Tuple[int, int] - max_native_zoom: Optional[int] = None - tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE - version: Optional[int] = 2 - multithread: bool = True - - def __post_init__(self) -> None: - if self.max_native_zoom is None: - self.max_native_zoom = self.zoom_levels[0] - - def asdict(self) -> Dict[str, str]: - return { - "tileLayerUrl": self.tile_layer.url, - "bounds": [ - [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], - [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], - ], - "minZoom": self.zoom_levels[0], - "maxZoom": self.zoom_levels[1], - "maxNativeZoom": self.max_native_zoom, - "epsg": self.tile_bounds.epsg.name, - "tileSize": self.tile_size, - "alternativeLayers": [ - layer.asdict() for layer in self.alternative_layers - ], - "version": self.version, - } - - def raster_data( - self, zoom: int = 0, max_tiles: int = 32, multithread=True - ) -> RasterData: - """Converts the tiled image asset into a RasterData object containing an - np.ndarray. - - Uses the minimum zoom provided to render the image. - """ - if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: - xstart, ystart, xend, yend = self._get_simple_image_params(zoom) - elif self.tile_bounds.epsg == EPSG.EPSG4326: - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, self.tile_bounds - ) - elif self.tile_bounds.epsg == EPSG.EPSG3857: - # transform to 4326 - transformer = EPSGTransformer.create_geo_to_geo_transformer( - EPSG.EPSG3857, EPSG.EPSG4326 - ) - transforming_bounds = [ - transformer(self.tile_bounds.bounds[0]), - transformer(self.tile_bounds.bounds[1]), - ] - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, transforming_bounds - ) - else: - raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") - - self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) - - rounded_tiles, pixel_offsets = list( - zip( - *[ - self._tile_to_pixel(pt) - for pt in [xstart, ystart, xend, yend] - ] - ) - ) - - image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) - arr = self._crop_to_bounds(image, *pixel_offsets) - return RasterData(arr=arr) - - @property - def value(self) -> np.ndarray: - """Returns the value of a generated RasterData object.""" - return self.raster_data( - self.zoom_levels[0], multithread=self.multithread - ).value - - def _get_simple_image_params( - self, zoom - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - - Simple has different order of x / y than lat / lng because of how leaflet behaves - leaflet reports all points as pixel locations at a zoom of 0 - """ - xend, xstart, yend, ystart = ( - self.tile_bounds.bounds[1].x, - self.tile_bounds.bounds[0].x, - self.tile_bounds.bounds[1].y, - self.tile_bounds.bounds[0].y, - ) - return ( - *[ - x * (2 ** (zoom)) / self.tile_size - for x in [xstart, ystart, xend, yend] - ], - ) - - def _get_3857_image_params( - self, zoom: int, bounds: TiledBounds - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - """ - lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y - lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x - - # Convert to zoom 0 tile coordinates - xstart, ystart = self._latlng_to_tile(lat_start, lng_start) - xend, yend = self._latlng_to_tile(lat_end, lng_end) - - # Make sure that the tiles are increasing in order - xstart, xend = min(xstart, xend), max(xstart, xend) - ystart, yend = min(ystart, yend), max(ystart, yend) - return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) - - def _latlng_to_tile( - self, lat: float, lng: float, zoom=0 - ) -> Tuple[float, float]: - """Converts lat/lng to 3857 tile coordinates - Formula found here: - https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 - """ - scale = 2**zoom - lat_rad = math.radians(lat) - x = (lng + 180.0) / 360.0 * scale - y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale - return x, y - - def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: - """Rounds a tile coordinate and reports the remainder in pixels""" - rounded_tile = int(tile) - remainder = tile - rounded_tile - pixel_offset = int(self.tile_size * remainder) - return rounded_tile, pixel_offset - - def _fetch_image_for_bounds( - self, - x_tile_start: int, - y_tile_start: int, - x_tile_end: int, - y_tile_end: int, - zoom: int, - multithread=True, - ) -> np.ndarray: - """Fetches the tiles and combines them into a single image. - - If a tile cannot be fetched, a padding of expected tile size is instead added. - """ - - if multithread: - tiles = {} - with ThreadPoolExecutor( - max_workers=TILE_DOWNLOAD_CONCURRENCY - ) as exc: - for x in range(x_tile_start, x_tile_end + 1): - for y in range(y_tile_start, y_tile_end + 1): - tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) - - rows = [] - for y in range(y_tile_start, y_tile_end + 1): - row = [] - for x in range(x_tile_start, x_tile_end + 1): - try: - if multithread: - row.append(tiles[(x, y)].result()) - else: - row.append(self._fetch_tile(x, y, zoom)) - except: - row.append( - np.zeros( - shape=(self.tile_size, self.tile_size, 3), - dtype=np.uint8, - ) - ) - rows.append(np.hstack(row)) - - return np.vstack(rows) - - @retry.Retry(initial=1, maximum=16, multiplier=2) - def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: - """ - Fetches the image and returns an np array. - """ - data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) - data.raise_for_status() - decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] - if decoded.shape[:2] != (self.tile_size, self.tile_size): - logger.warning(f"Unexpected tile size {decoded.shape}.") - return decoded - - def _crop_to_bounds( - self, - image: np.ndarray, - x_px_start: int, - y_px_start: int, - x_px_end: int, - y_px_end: int, - ) -> np.ndarray: - """This function slices off the excess pixels that are outside of the bounds. - This occurs because only full tiles can be downloaded at a time. - """ - - def invert_point(pt): - # Must have at least 1 pixel for stability. - pt = max(pt, 1) - # All pixel points are relative to a single tile - # So subtracting the tile size inverts the axis - pt = pt - self.tile_size - return pt if pt != 0 else None - - x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) - return image[y_px_start:y_px_end, x_px_start:x_px_end, :] - - def _validate_num_tiles( - self, - xstart: float, - ystart: float, - xend: float, - yend: float, - max_tiles: int, - ): - """Calculates the number of expected tiles we would fetch. - - If this is greater than the number of max tiles, raise an error. - """ - total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) - if total_n_tiles > max_tiles: - raise ValueError( - f"Requested zoom results in {total_n_tiles} tiles." - f"Max allowed tiles are {max_tiles}" - f"Increase max tiles or reduce zoom level." - ) - - @field_validator("zoom_levels") - def validate_zoom_levels(cls, zoom_levels): - if zoom_levels[0] > zoom_levels[1]: - raise ValueError( - f"Order of zoom levels should be min, max. Received {zoom_levels}" - ) - return zoom_levels - - class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py b/libs/labelbox/src/labelbox/data/annotation_types/data/video.py deleted file mode 100644 index 0f40911d8..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py +++ /dev/null @@ -1,173 +0,0 @@ -import logging -import os -import urllib.request -from typing import Callable, Dict, Generator, Optional, Tuple -from typing_extensions import Literal -from uuid import uuid4 - -import cv2 -import numpy as np -from google.api_core import retry - -from .base_data import BaseData -from ..types import TypedArray - -from pydantic import ConfigDict, model_validator - -logger = logging.getLogger(__name__) - - -class VideoData(BaseData): - """ - Represents video - """ - - file_path: Optional[str] = None - url: Optional[str] = None - frames: Optional[Dict[int, TypedArray[Literal["uint8"]]]] = None - # Required for discriminating between data types - model_config = ConfigDict(extra="forbid") - - def load_frames(self, overwrite: bool = False) -> None: - """ - Loads all frames into memory at once in order to access in non-sequential order. - This will use a lot of memory, especially for longer videos - - Args: - overwrite: Replace existing frames - """ - if self.frames and not overwrite: - return - - for count, frame in self.frame_generator(): - if self.frames is None: - self.frames = {} - self.frames[count] = frame - - @property - def value(self): - return self.frame_generator() - - def frame_generator( - self, cache_frames=False, download_dir="/tmp" - ) -> Generator[Tuple[int, np.ndarray], None, None]: - """ - A generator for accessing individual frames in a video. - - Args: - cache_frames (bool): Whether or not to cache frames while iterating through the video. - download_dir (str): Directory to save the video to. Defaults to `/tmp` dir - """ - if self.frames is not None: - for idx, frame in self.frames.items(): - yield idx, frame - return - elif self.url and not self.file_path: - file_path = os.path.join(download_dir, f"{uuid4()}.mp4") - logger.info("Downloading the video locally to %s", file_path) - self.fetch_remote(file_path) - self.file_path = file_path - - vidcap = cv2.VideoCapture(self.file_path) - - success, frame = vidcap.read() - count = 0 - if cache_frames: - self.frames = {} - while success: - frame = frame[:, :, ::-1] - yield count, frame - if cache_frames: - self.frames[count] = frame - success, frame = vidcap.read() - count += 1 - - def __getitem__(self, idx: int) -> np.ndarray: - if self.frames is None: - raise ValueError( - "Cannot select by index without iterating over the entire video or loading all frames." - ) - return self.frames[idx] - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry(deadline=15.0) - def fetch_remote(self, local_path) -> None: - """ - Method for downloading data from self.url - - If url is not publicly accessible or requires another access pattern - simply override this function - - Args: - local_path: Where to save the thing too. - """ - urllib.request.urlretrieve(self.url, local_path) - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other video references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the video - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.frames is not None: - self.file_path = self.frames_to_video(self.frames) - self.url = self.create_url(signer) - else: - raise ValueError("One of url, file_path, frames must not be None.") - return self.url - - def frames_to_video( - self, frames: Dict[int, np.ndarray], fps=20, save_dir="/tmp" - ) -> str: - """ - Compresses the data by converting a set of individual frames to a single video. - - """ - file_path = os.path.join(save_dir, f"{uuid4()}.mp4") - out = None - for key in frames.keys(): - frame = frames[key] - if out is None: - out = cv2.VideoWriter( - file_path, - cv2.VideoWriter_fourcc(*"MP4V"), - fps, - frame.shape[:2], - ) - out.write(frame) - if out is None: - return - out.release() - return file_path - - @model_validator(mode="after") - def validate_data(self): - file_path = self.file_path - url = self.url - frames = self.frames - uid = self.uid - global_key = self.global_key - - if uid == file_path == frames == url == global_key is None: - raise ValueError( - "One of `file_path`, `frames`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"VideoData(file_path={self.file_path}," - f"frames={'...' if self.frames is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 7eef43f31..9d5b92bdd 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -6,7 +6,6 @@ from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.tiled_image import TiledImageData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -14,19 +13,6 @@ from .relationship import RelationshipAnnotation from .llm_prompt_response.prompt import PromptClassificationAnnotation from .classification import ClassificationAnswer -from .data import ( - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - ImageData, - TextData, - VideoData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, -) from .geometry import Mask from .metrics import ScalarMetric, ConfusionMatrixMetric from .video import VideoClassificationAnnotation @@ -35,22 +21,6 @@ from ..ontology import get_feature_schema_lookup from pydantic import BaseModel, field_validator, model_serializer -DataType = Union[ - VideoData, - ImageData, - TextData, - TiledImageData, - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, - GenericDataRowData, -] - class Label(BaseModel): """Container for holding data and annotations @@ -67,14 +37,13 @@ class Label(BaseModel): Args: uid: Optional Label Id in Labelbox - data: Data of Label, Image, Video, Text or dict with a single key uid | global_key | external_id. - Note use of classes as data is deprecated. Use GenericDataRowData or dict with a single key instead. + data: GenericDataRowData or dict with a single key uid | global_key | external_id. annotations: List of Annotations in the label extra: additional context """ uid: Optional[Cuid] = None - data: DataType + data: GenericDataRowData annotations: List[ Union[ ClassificationAnnotation, @@ -94,13 +63,6 @@ class Label(BaseModel): def validate_data(cls, data): if isinstance(data, Dict): return GenericDataRowData(**data) - elif isinstance(data, GenericDataRowData): - return data - else: - warnings.warn( - f"Using {type(data).__name__} class for label.data is deprecated. " - "Use a dict or an instance of GenericDataRowData instead." - ) return data def object_annotations(self) -> List[ObjectAnnotation]: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index 2c3215265..86cf0d094 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Union, Optional -from labelbox.data.annotation_types import ImageData, TextData, VideoData +from labelbox.data.annotation_types import GenericDataRowData from labelbox.data.mixins import ( ConfidenceMixin, CustomMetric, @@ -232,7 +232,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, ) -> "NDText": @@ -264,7 +264,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, @@ -304,7 +304,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, ) -> "NDRadio": @@ -427,7 +427,7 @@ def from_common( annotation: Union[ ClassificationAnnotation, VideoClassificationAnnotation ], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: classify_obj = cls.lookup_classification(annotation) if classify_obj is None: @@ -475,7 +475,7 @@ def to_common( def from_common( cls, annotation: Union[PromptClassificationAnnotation], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: return NDPromptText.from_common( str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 7039ae834..ffaefb4d7 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -14,7 +14,6 @@ ) from ...annotation_types.video import VideoObjectAnnotation, VideoMaskAnnotation from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...annotation_types.data import DicomData, ImageData, TextData, VideoData from ...annotation_types.data.generic_data_row_data import GenericDataRowData from ...annotation_types.label import Label from ...annotation_types.ner import TextEntity, ConversationEntity @@ -214,46 +213,9 @@ def _generate_annotations( yield Label( annotations=annotations, - data=self._infer_media_type(group.data_row, annotations), + data=GenericDataRowData, ) - def _infer_media_type( - self, - data_row: DataRow, - annotations: List[ - Union[ - TextEntity, - ConversationEntity, - VideoClassificationAnnotation, - DICOMObjectAnnotation, - VideoObjectAnnotation, - ObjectAnnotation, - ClassificationAnnotation, - ScalarMetric, - ConfusionMatrixMetric, - ] - ], - ) -> Union[TextData, VideoData, ImageData]: - if len(annotations) == 0: - raise ValueError("Missing annotations while inferring media type") - - types = {type(annotation) for annotation in annotations} - data = GenericDataRowData - if (TextEntity in types) or (ConversationEntity in types): - data = TextData - elif ( - VideoClassificationAnnotation in types - or VideoObjectAnnotation in types - ): - data = VideoData - elif DICOMObjectAnnotation in types: - data = DicomData - - if data_row.id: - return data(uid=data_row.id) - else: - return data(global_key=data_row.global_key) - @staticmethod def _get_consecutive_frames( frames_indices: List[int], diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index b28e575cf..f8b522ab5 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -1,6 +1,6 @@ from typing import Optional, Union, Type -from labelbox.data.annotation_types.data import ImageData, TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.serialization.ndjson.base import DataRow, NDJsonBase from labelbox.data.annotation_types.metrics.scalar import ( ScalarMetric, @@ -51,7 +51,7 @@ def to_common(self) -> ConfusionMatrixMetric: @classmethod def from_common( - cls, metric: ConfusionMatrixMetric, data: Union[TextData, ImageData] + cls, metric: ConfusionMatrixMetric, data: GenericDataRowData ) -> "NDConfusionMatrixMetric": return cls( uuid=metric.extra.get("uuid"), @@ -83,7 +83,7 @@ def to_common(self) -> ScalarMetric: @classmethod def from_common( - cls, metric: ScalarMetric, data: Union[TextData, ImageData] + cls, metric: ScalarMetric, data: GenericDataRowData ) -> "NDScalarMetric": return cls( uuid=metric.extra.get("uuid"), @@ -107,7 +107,7 @@ def to_common( def from_common( cls, annotation: Union[ScalarMetric, ConfusionMatrixMetric], - data: Union[TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDScalarMetric, NDConfusionMatrixMetric]: obj = cls.lookup_object(annotation) return obj.from_common(annotation, data) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 74d185f45..b2dcfb5b4 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -9,6 +9,7 @@ MessageRankingTask, MessageEvaluationTaskAnnotation, ) +from ...annotation_types import GenericDataRowData class MessageTaskData(_CamelCaseMixin): @@ -35,7 +36,7 @@ def to_common(self) -> MessageEvaluationTaskAnnotation: def from_common( cls, annotation: MessageEvaluationTaskAnnotation, - data: Any, # Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDMessageTask": return cls( uuid=str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index 91abface6..1bcba7a89 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Tuple, Union, Optional import base64 +from labelbox.data.annotation_types.data.raster import MaskData from labelbox.data.annotation_types.ner.conversation_entity import ( ConversationEntity, ) @@ -21,9 +22,9 @@ from PIL import Image from labelbox.data.annotation_types import feature -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData -from ...annotation_types.data import ImageData, TextData, MaskData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.ner import ( DocumentEntity, DocumentTextSelection, @@ -96,7 +97,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPoint": @@ -161,7 +162,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDLine": @@ -245,7 +246,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPolygon": @@ -282,7 +283,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -329,7 +330,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -508,7 +509,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[VideoObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -545,7 +546,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[DICOMObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -601,7 +602,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDMask": @@ -706,7 +707,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDTextEntity": @@ -743,7 +744,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDDocumentEntity": @@ -778,7 +779,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDConversationEntity": @@ -836,7 +837,7 @@ def from_common( List[List[VideoObjectAnnotation]], VideoMaskAnnotation, ], - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> Union[ NDLine, NDPoint, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index 94c8e9879..d558ac244 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -1,7 +1,7 @@ from typing import Union from pydantic import BaseModel from .base import NDAnnotation, DataRow -from ...annotation_types.data import ImageData, TextData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType @@ -40,7 +40,7 @@ def to_common( def from_common( cls, annotation: RelationshipAnnotation, - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDRelationship": relationship = annotation.value return cls( diff --git a/libs/labelbox/src/labelbox/utils.py b/libs/labelbox/src/labelbox/utils.py index c76ce188f..dcf51be82 100644 --- a/libs/labelbox/src/labelbox/utils.py +++ b/libs/labelbox/src/labelbox/utils.py @@ -87,8 +87,8 @@ class _NoCoercionMixin: when serializing the object. Example: - class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" + class GenericDataRowData(BaseData, _NoCoercionMixin): + class_name: Literal["GenericDataRowData"] = "GenericDataRowData" """ diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 1c9cd669e..c16f61b64 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -7,19 +7,21 @@ from labelbox.data.annotation_types import ( LabelGenerator, ObjectAnnotation, - ImageData, - MaskData, Line, Mask, Point, Label, + GenericDataRowData, + MaskData, ) from labelbox import OntologyBuilder, Tool @pytest.fixture def list_of_labels(): - return [Label(data=ImageData(url="http://someurl")) for _ in range(5)] + return [ + Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + ] @pytest.fixture @@ -73,7 +75,7 @@ def test_conversion(list_of_labels): def test_adding_schema_ids(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=GenericDataRowData(uid="123456"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -93,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -106,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -121,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[ ObjectAnnotation( name="1234", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..fb78916f4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -4,7 +4,7 @@ ClassificationAnswer, Radio, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -13,9 +13,8 @@ def test_serialization_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -41,9 +40,8 @@ def test_serialization_min(): def test_serialization_with_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -131,9 +129,8 @@ def test_serialization_with_classification(): def test_serialization_with_classification_double_nested(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -224,9 +221,8 @@ def test_serialization_with_classification_double_nested(): def test_serialization_with_classification_double_nested_2(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..4d615658c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -11,7 +11,6 @@ Mask, Label, ObjectAnnotation, - ImageData, MaskData, ) from labelbox.types import Rectangle, Polygon, Point @@ -262,7 +261,7 @@ def test_mask_from_arr(): ), ) ], - data=ImageData(uid="0" * 25), + data=GenericDataRowData(uid="0" * 25), ) res = next(NDJsonConverter.serialize([label])) res.pop("uuid") diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..ec57f0528 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -3,7 +3,7 @@ ClassificationAnswer, ) from labelbox.data.annotation_types.classification.classification import Radio -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -12,9 +12,8 @@ def test_serialization_with_radio_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -43,9 +42,8 @@ def test_serialization_with_radio_min(): def test_serialization_with_radio_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..28eba07bd 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -2,7 +2,7 @@ from labelbox.data.annotation_types.classification.classification import ( Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -11,9 +11,8 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( From e31d118fc9b7cfe666917b565298b67240d513ae Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:46:16 -0500 Subject: [PATCH 22/35] Fixed video --- libs/labelbox/tests/unit/test_label_data_type.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 7bc32e37c..662fa5a5a 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -1,11 +1,7 @@ -from email import message import pytest -from pydantic import ValidationError - from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.label import Label @@ -42,9 +38,9 @@ def test_video_data_type(): "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", } with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=VideoData(**data)) + label = Label(data=GenericDataRowData(**data)) data = label.data - assert isinstance(data, VideoData) + assert isinstance(data, GenericDataRowData) assert ( data.global_key == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" From 1a8189a254423dfa66964a2f2b1d57bd03061ed7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:51:12 -0500 Subject: [PATCH 23/35] Removed data type test --- libs/labelbox/tests/unit/test_label_data_type.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 662fa5a5a..611324f78 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -33,20 +33,6 @@ def test_generic_data_type_validations(): Label(data=data) -def test_video_data_type(): - data = { - "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", - } - with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=GenericDataRowData(**data)) - data = label.data - assert isinstance(data, GenericDataRowData) - assert ( - data.global_key - == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" - ) - - def test_generic_data_row(): data = { "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", From 62f5fbdbb9d58be60f8327784f3e1da63d2c6005 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:42:52 -0500 Subject: [PATCH 24/35] Made fix --- .../labelbox/tests/data/annotation_types/test_collection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index c16f61b64..26b91fc61 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -95,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -108,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -123,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[ ObjectAnnotation( name="1234", From 6c11e746cdf1542cc2fa6155ccc0e7efcfcdf8c7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:53:19 -0500 Subject: [PATCH 25/35] Fix list of labels --- libs/labelbox/tests/data/annotation_types/test_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 26b91fc61..e7e51e951 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -20,7 +20,7 @@ @pytest.fixture def list_of_labels(): return [ - Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + Label(data=GenericDataRowData(uid="http://someurl")) for _ in range(5) ] From 1ef53cc7608e93eb062cb6e833cc3e9aa822827d Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:08:03 -0500 Subject: [PATCH 26/35] Removed add url --- .../data/annotation_types/collection.py | 20 ------------------- .../labelbox/data/annotation_types/label.py | 13 ------------ .../data/annotation_types/test_collection.py | 13 ------------ 3 files changed, 46 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index d90204309..9eb1fe53e 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,26 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_url_to_data( - self, signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that signs urls as data is accessed - """ - - def _add_url_to_data(label: Label): - label.add_url_to_data(signer) - return label - - self._fns["add_url_to_data"] = _add_url_to_data - return self - def add_to_dataset( self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 9d5b92bdd..a18460bc1 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -90,19 +90,6 @@ def frame_annotations( frame_dict[annotation.frame].append(annotation) return frame_dict - def add_url_to_data(self, signer) -> "Label": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - Label with updated references to new data url - """ - self.data.create_url(signer) - return self - def add_url_to_masks(self, signer) -> "Label": """ Creates signed urls for all masks in the Label. diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index e7e51e951..df0d9b007 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,18 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_urls(signer): - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_url_to_data(signer(uuid)) - assert label.data.url != uuid - assert next(generator).data.url == uuid - assert label.data.url == uuid - - def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( @@ -113,7 +101,6 @@ def test_adding_to_dataset(signer): ) uuid = str(uuid4()) generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - assert label.data.url != uuid generated_label = next(generator) assert generated_label.data.url == uuid assert generated_label.data.external_id is not None From 159e22711779ecc0a5b411b3f8da49cb6c0845eb Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:35:51 -0500 Subject: [PATCH 27/35] Removed rest of tests --- .../data/annotation_types/collection.py | 21 ----------------- .../data/annotation_types/data/test_raster.py | 13 +++++------ .../data/annotation_types/test_collection.py | 15 ------------ .../data/annotation_types/test_metrics.py | 15 ++++++++---- .../data/annotation_types/test_tiled_image.py | 23 ------------------- .../serialization/ndjson/test_conversation.py | 10 ++++---- .../data/serialization/ndjson/test_dicom.py | 14 ++++++----- .../serialization/ndjson/test_document.py | 2 +- .../serialization/ndjson/test_free_text.py | 6 ++--- .../data/serialization/ndjson/test_video.py | 15 ++++++------ 10 files changed, 41 insertions(+), 93 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 9eb1fe53e..2e76176a8 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,27 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_to_dataset( - self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates data rows from each labels data object and attaches the data to the given dataset. - Updates the label's data object to have the same external_id and uid as the data row. - - Args: - dataset: labelbox dataset object to add the new data row to - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that updates references to the new data rows as data is accessed - """ - - def _add_to_dataset(label: Label): - label.create_data_row(dataset, signer) - return label - - self._fns["assign_datarow_ids"] = _add_to_dataset - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/tests/data/annotation_types/data/test_raster.py b/libs/labelbox/tests/data/annotation_types/data/test_raster.py index 6bc8f2bbf..304ed3e95 100644 --- a/libs/labelbox/tests/data/annotation_types/data/test_raster.py +++ b/libs/labelbox/tests/data/annotation_types/data/test_raster.py @@ -5,26 +5,26 @@ import pytest from PIL import Image -from labelbox.data.annotation_types.data import ImageData +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from pydantic import ValidationError def test_validate_schema(): with pytest.raises(ValidationError): - data = ImageData() + GenericDataRowData() def test_im_bytes(): data = (np.random.random((32, 32, 3)) * 255).astype(np.uint8) im_bytes = BytesIO() Image.fromarray(data).save(im_bytes, format="PNG") - raster_data = ImageData(im_bytes=im_bytes.getvalue()) + raster_data = MaskData(im_bytes=im_bytes.getvalue()) data_ = raster_data.value assert np.all(data == data_) def test_im_url(): - raster_data = ImageData(url="https://picsum.photos/id/829/200/300") + raster_data = GenericDataRowData(url="https://picsum.photos/id/829/200/300") data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -32,7 +32,7 @@ def test_im_url(): def test_im_path(): img_path = "/tmp/img.jpg" urllib.request.urlretrieve("https://picsum.photos/id/829/200/300", img_path) - raster_data = ImageData(file_path=img_path) + raster_data = GenericDataRowData(file_path=img_path) data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -42,8 +42,7 @@ def test_ref(): uid = "uid" metadata = [] media_attributes = {} - data = ImageData( - im_bytes=b"", + data = GenericDataRowData( external_id=external_id, uid=uid, metadata=metadata, diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index df0d9b007..f818b94ff 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,21 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_to_dataset(signer): - dataset = FakeDataset() - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - generated_label = next(generator) - assert generated_label.data.url == uuid - assert generated_label.data.external_id is not None - assert generated_label.data.uid == dataset.uid - assert label.data.url == uuid - - def test_adding_to_masks(signer): label = Label( data=GenericDataRowData(uid="12345"), diff --git a/libs/labelbox/tests/data/annotation_types/test_metrics.py b/libs/labelbox/tests/data/annotation_types/test_metrics.py index 94c9521a5..4e9355573 100644 --- a/libs/labelbox/tests/data/annotation_types/test_metrics.py +++ b/libs/labelbox/tests/data/annotation_types/test_metrics.py @@ -8,7 +8,11 @@ ConfusionMatrixMetric, ScalarMetric, ) -from labelbox.data.annotation_types import ScalarMetric, Label, ImageData +from labelbox.data.annotation_types import ( + ScalarMetric, + Label, + GenericDataRowData, +) from labelbox.data.annotation_types.metrics.scalar import RESERVED_METRIC_NAMES from pydantic import ValidationError @@ -19,7 +23,8 @@ def test_legacy_scalar_metric(): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -72,7 +77,8 @@ def test_custom_scalar_metric(feature_name, subclass_name, aggregation, value): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -134,7 +140,8 @@ def test_custom_confusison_matrix_metric( assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 46f2383d6..9b96c9445 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,8 +6,6 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, - TileLayer, - TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -37,27 +35,6 @@ def test_tiled_bounds_same(epsg): ) -def test_create_tiled_image_data(): - bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] - url = ( - "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" - ) - zoom_levels = (1, 10) - - tile_layer = TileLayer(url=url, name="slippy map tile") - tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) - tiled_image_data = TiledImageData( - tile_layer=tile_layer, - tile_bounds=tile_bounds, - zoom_levels=zoom_levels, - version=2, - ) - assert isinstance(tiled_image_data, TiledImageData) - assert tiled_image_data.tile_bounds.bounds == bounds_points - assert tiled_image_data.tile_layer.url == url - assert tiled_image_data.zoom_levels == zoom_levels - - def test_epsg_point_projections(): zoom = 4 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..5aa7285e2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -19,7 +19,7 @@ radio_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="radio", @@ -48,7 +48,7 @@ checklist_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="checklist", @@ -78,7 +78,7 @@ ] free_text_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -164,7 +164,7 @@ def test_conversation_entity_import_without_confidence(): def test_benchmark_reference_label_flag_enabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -181,7 +181,7 @@ def test_benchmark_reference_label_flag_enabled(): def test_benchmark_reference_label_flag_disabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..6a00fa871 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -31,7 +31,7 @@ ] polyline_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), + data=lb_types.GenericDataRowData(uid="test-uid"), annotations=dicom_polyline_annotations, ) @@ -58,7 +58,7 @@ } polyline_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=dicom_polyline_annotations, ) @@ -109,11 +109,12 @@ } video_mask_label = lb_types.Label( - data=lb_types.VideoData(uid="test-uid"), annotations=[video_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[video_mask_annotation], ) video_mask_label_with_global_key = lb_types.Label( - data=lb_types.VideoData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[video_mask_annotation], ) """ @@ -128,11 +129,12 @@ ) dicom_mask_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), annotations=[dicom_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[dicom_mask_annotation], ) dicom_mask_label_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[dicom_mask_annotation], ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..fcdf4368b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -26,7 +26,7 @@ ) bbox_labels = [ lb_types.Label( - data=lb_types.DocumentData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[bbox_annotation], ) ] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..7b03a8447 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -5,7 +5,7 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -14,7 +14,7 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), @@ -38,7 +38,7 @@ def test_serialization(): def test_nested_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..6c14343a4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -6,11 +6,10 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.geometry.line import Line from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.geometry.rectangle import Rectangle -from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import ( @@ -28,7 +27,7 @@ def test_video(): labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( feature_schema_id="ckrb1sfjx099a0y914hl319ie", @@ -304,7 +303,7 @@ def test_video_name_only(): data = json.load(file) labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( name="question 1", @@ -574,7 +573,7 @@ def test_video_name_only(): def test_video_classification_global_subclassifications(): label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=[ @@ -790,7 +789,7 @@ def test_video_classification_nesting_bbox(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -940,7 +939,7 @@ def test_video_classification_point(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -1108,7 +1107,7 @@ def test_video_classification_frameline(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, From 5f625c8ba890e73e05764916f3a62b022969d913 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:15:43 -0500 Subject: [PATCH 28/35] Fix tests --- .../tests/data/annotation_types/test_label.py | 20 +++++++++++-------- .../test_export_video_streamable.py | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 5bdfb6bde..8439837ed 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -17,7 +17,7 @@ ObjectAnnotation, Point, Line, - ImageData, + MaskData, Label, ) import pytest @@ -26,7 +26,9 @@ def test_schema_assignment_geometry(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -51,7 +53,7 @@ def test_schema_assignment_classification(): option_name = "my_option" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ClassificationAnnotation( value=Radio(answer=ClassificationAnswer(name=option_name)), @@ -102,7 +104,7 @@ def test_schema_assignment_subclass(): value=Radio(answer=ClassificationAnswer(name=option_name)), ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -167,7 +169,9 @@ def test_highly_nested(): ], ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -230,7 +234,7 @@ def test_highly_nested(): def test_schema_assignment_confidence(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line( @@ -252,10 +256,10 @@ def test_initialize_label_no_coercion(): value=lb_types.ConversationEntity(start=0, end=8, message_id="4"), ) label = Label( - data=lb_types.ConversationData(global_key=global_key), + data=lb_types.GenericDataRowData(global_key=global_key), annotations=[ner_annotation], ) - assert isinstance(label.data, lb_types.ConversationData) + assert isinstance(label.data, lb_types.GenericDataRowData) assert label.data.global_key == global_key diff --git a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py index 115194a58..28ef6e0cf 100644 --- a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py +++ b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py @@ -4,7 +4,7 @@ import labelbox as lb import labelbox.types as lb_types -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.schema.annotation_import import AnnotationImportState from labelbox.schema.export_task import ExportTask, StreamType @@ -41,7 +41,7 @@ def test_export( for data_row_uid in data_row_uids: labels = [ lb_types.Label( - data=VideoData(uid=data_row_uid), + data=GenericDataRowData(uid=data_row_uid), annotations=bbox_video_annotation_objects, ) ] From 90c1a1950dfd45bceff32b1b39c12c91f07e6a39 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:16:05 -0500 Subject: [PATCH 29/35] Finish PR --- libs/labelbox/src/labelbox/data/annotation_types/label.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index a18460bc1..8ae05f898 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -3,9 +3,7 @@ import warnings import labelbox -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -19,7 +17,7 @@ from .video import VideoObjectAnnotation, VideoMaskAnnotation from .mmc import MessageEvaluationTaskAnnotation from ..ontology import get_feature_schema_lookup -from pydantic import BaseModel, field_validator, model_serializer +from pydantic import BaseModel, field_validator class Label(BaseModel): @@ -43,7 +41,7 @@ class Label(BaseModel): """ uid: Optional[Cuid] = None - data: GenericDataRowData + data: Union[GenericDataRowData, MaskData] annotations: List[ Union[ ClassificationAnnotation, From f91a229bf7d114c407d5a80e3fb24b14714a4d44 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:44:14 -0500 Subject: [PATCH 30/35] Added back in tile data since some of its parameters are required --- .../data/annotation_types/data/tiled_image.py | 294 ++++++++++++++++++ .../data/annotation_types/test_tiled_image.py | 23 ++ 2 files changed, 317 insertions(+) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index cdb7f4127..adb8db549 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,6 +88,300 @@ def validate_bounds_lat_lng(self): return self +class TileLayer(BaseModel): + """Url that contains the tile layer. Must be in the format: + + https://c.tile.openstreetmap.org/{z}/{x}/{y}.png + + >>> layer = TileLayer( + url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", + name="slippy map tile" + ) + """ + + url: str + name: Optional[str] = "default" + + def asdict(self) -> Dict[str, str]: + return {"tileLayerUrl": self.url, "name": self.name} + + @field_validator("url") + def validate_url(cls, url): + xyz_format = "/{z}/{x}/{y}" + if xyz_format not in url: + raise ValueError(f"{url} needs to contain {xyz_format}") + return url + + +class TiledImageData(BaseData): + """Represents tiled imagery + + If specified version is 2, converts bounds from [lng,lat] to [lat,lng] + + Requires the following args: + tile_layer: TileLayer + tile_bounds: TiledBounds + zoom_levels: List[int] + Optional args: + max_native_zoom: int = None + tile_size: Optional[int] + version: int = 2 + alternative_layers: List[TileLayer] + + >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, + tile_bounds=TiledBounds, + zoom_levels=[1, 12]) + """ + + tile_layer: TileLayer + tile_bounds: TiledBounds + alternative_layers: List[TileLayer] = [] + zoom_levels: Tuple[int, int] + max_native_zoom: Optional[int] = None + tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE + version: Optional[int] = 2 + multithread: bool = True + + def __post_init__(self) -> None: + if self.max_native_zoom is None: + self.max_native_zoom = self.zoom_levels[0] + + def asdict(self) -> Dict[str, str]: + return { + "tileLayerUrl": self.tile_layer.url, + "bounds": [ + [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], + [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], + ], + "minZoom": self.zoom_levels[0], + "maxZoom": self.zoom_levels[1], + "maxNativeZoom": self.max_native_zoom, + "epsg": self.tile_bounds.epsg.name, + "tileSize": self.tile_size, + "alternativeLayers": [ + layer.asdict() for layer in self.alternative_layers + ], + "version": self.version, + } + + def raster_data( + self, zoom: int = 0, max_tiles: int = 32, multithread=True + ) -> RasterData: + """Converts the tiled image asset into a RasterData object containing an + np.ndarray. + + Uses the minimum zoom provided to render the image. + """ + if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: + xstart, ystart, xend, yend = self._get_simple_image_params(zoom) + elif self.tile_bounds.epsg == EPSG.EPSG4326: + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, self.tile_bounds + ) + elif self.tile_bounds.epsg == EPSG.EPSG3857: + # transform to 4326 + transformer = EPSGTransformer.create_geo_to_geo_transformer( + EPSG.EPSG3857, EPSG.EPSG4326 + ) + transforming_bounds = [ + transformer(self.tile_bounds.bounds[0]), + transformer(self.tile_bounds.bounds[1]), + ] + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, transforming_bounds + ) + else: + raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") + + self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) + + rounded_tiles, pixel_offsets = list( + zip( + *[ + self._tile_to_pixel(pt) + for pt in [xstart, ystart, xend, yend] + ] + ) + ) + + image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) + arr = self._crop_to_bounds(image, *pixel_offsets) + return RasterData(arr=arr) + + @property + def value(self) -> np.ndarray: + """Returns the value of a generated RasterData object.""" + return self.raster_data( + self.zoom_levels[0], multithread=self.multithread + ).value + + def _get_simple_image_params( + self, zoom + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + + Simple has different order of x / y than lat / lng because of how leaflet behaves + leaflet reports all points as pixel locations at a zoom of 0 + """ + xend, xstart, yend, ystart = ( + self.tile_bounds.bounds[1].x, + self.tile_bounds.bounds[0].x, + self.tile_bounds.bounds[1].y, + self.tile_bounds.bounds[0].y, + ) + return ( + *[ + x * (2 ** (zoom)) / self.tile_size + for x in [xstart, ystart, xend, yend] + ], + ) + + def _get_3857_image_params( + self, zoom: int, bounds: TiledBounds + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + """ + lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y + lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x + + # Convert to zoom 0 tile coordinates + xstart, ystart = self._latlng_to_tile(lat_start, lng_start) + xend, yend = self._latlng_to_tile(lat_end, lng_end) + + # Make sure that the tiles are increasing in order + xstart, xend = min(xstart, xend), max(xstart, xend) + ystart, yend = min(ystart, yend), max(ystart, yend) + return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) + + def _latlng_to_tile( + self, lat: float, lng: float, zoom=0 + ) -> Tuple[float, float]: + """Converts lat/lng to 3857 tile coordinates + Formula found here: + https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 + """ + scale = 2**zoom + lat_rad = math.radians(lat) + x = (lng + 180.0) / 360.0 * scale + y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale + return x, y + + def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: + """Rounds a tile coordinate and reports the remainder in pixels""" + rounded_tile = int(tile) + remainder = tile - rounded_tile + pixel_offset = int(self.tile_size * remainder) + return rounded_tile, pixel_offset + + def _fetch_image_for_bounds( + self, + x_tile_start: int, + y_tile_start: int, + x_tile_end: int, + y_tile_end: int, + zoom: int, + multithread=True, + ) -> np.ndarray: + """Fetches the tiles and combines them into a single image. + + If a tile cannot be fetched, a padding of expected tile size is instead added. + """ + + if multithread: + tiles = {} + with ThreadPoolExecutor( + max_workers=TILE_DOWNLOAD_CONCURRENCY + ) as exc: + for x in range(x_tile_start, x_tile_end + 1): + for y in range(y_tile_start, y_tile_end + 1): + tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) + + rows = [] + for y in range(y_tile_start, y_tile_end + 1): + row = [] + for x in range(x_tile_start, x_tile_end + 1): + try: + if multithread: + row.append(tiles[(x, y)].result()) + else: + row.append(self._fetch_tile(x, y, zoom)) + except: + row.append( + np.zeros( + shape=(self.tile_size, self.tile_size, 3), + dtype=np.uint8, + ) + ) + rows.append(np.hstack(row)) + + return np.vstack(rows) + + @retry.Retry(initial=1, maximum=16, multiplier=2) + def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: + """ + Fetches the image and returns an np array. + """ + data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) + data.raise_for_status() + decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] + if decoded.shape[:2] != (self.tile_size, self.tile_size): + logger.warning(f"Unexpected tile size {decoded.shape}.") + return decoded + + def _crop_to_bounds( + self, + image: np.ndarray, + x_px_start: int, + y_px_start: int, + x_px_end: int, + y_px_end: int, + ) -> np.ndarray: + """This function slices off the excess pixels that are outside of the bounds. + This occurs because only full tiles can be downloaded at a time. + """ + + def invert_point(pt): + # Must have at least 1 pixel for stability. + pt = max(pt, 1) + # All pixel points are relative to a single tile + # So subtracting the tile size inverts the axis + pt = pt - self.tile_size + return pt if pt != 0 else None + + x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) + return image[y_px_start:y_px_end, x_px_start:x_px_end, :] + + def _validate_num_tiles( + self, + xstart: float, + ystart: float, + xend: float, + yend: float, + max_tiles: int, + ): + """Calculates the number of expected tiles we would fetch. + + If this is greater than the number of max tiles, raise an error. + """ + total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) + if total_n_tiles > max_tiles: + raise ValueError( + f"Requested zoom results in {total_n_tiles} tiles." + f"Max allowed tiles are {max_tiles}" + f"Increase max tiles or reduce zoom level." + ) + + @field_validator("zoom_levels") + def validate_zoom_levels(cls, zoom_levels): + if zoom_levels[0] > zoom_levels[1]: + raise ValueError( + f"Order of zoom levels should be min, max. Received {zoom_levels}" + ) + return zoom_levels + + class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 9b96c9445..46f2383d6 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,6 +6,8 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, + TileLayer, + TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -35,6 +37,27 @@ def test_tiled_bounds_same(epsg): ) +def test_create_tiled_image_data(): + bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] + url = ( + "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" + ) + zoom_levels = (1, 10) + + tile_layer = TileLayer(url=url, name="slippy map tile") + tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) + tiled_image_data = TiledImageData( + tile_layer=tile_layer, + tile_bounds=tile_bounds, + zoom_levels=zoom_levels, + version=2, + ) + assert isinstance(tiled_image_data, TiledImageData) + assert tiled_image_data.tile_bounds.bounds == bounds_points + assert tiled_image_data.tile_layer.url == url + assert tiled_image_data.zoom_levels == zoom_levels + + def test_epsg_point_projections(): zoom = 4 From fdabc94b4345f13fb454db1420229f99adc9cb40 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:51:42 -0500 Subject: [PATCH 31/35] Added tile back to __init__.py --- libs/labelbox/src/labelbox/data/annotation_types/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index 84d6d65a5..1a78127e1 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -48,6 +48,8 @@ from .data.tiled_image import EPSG from .data.tiled_image import EPSGTransformer from .data.tiled_image import TiledBounds +from .data.tiled_image import TiledImageData +from .data.tiled_image import TileLayer from .llm_prompt_response.prompt import PromptText from .llm_prompt_response.prompt import PromptClassificationAnnotation From 5bb3c97b59cd87f776a9b8657584d9e4f9944350 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 12:50:08 -0500 Subject: [PATCH 32/35] Fixed import --- libs/labelbox/src/labelbox/data/annotation_types/data/raster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index 3a4e8bb6e..fc9acd50f 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -12,7 +12,6 @@ from typing_extensions import Literal from pydantic import BaseModel, model_validator, ConfigDict -from labelbox.exceptions import InternalServerError from ..types import TypedArray From 4888346a129f3b172bca5dde6f2ff548e7002760 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:50:16 -0500 Subject: [PATCH 33/35] Removed some data tests --- .../data/annotation_types/data/test_raster.py | 16 +--- .../data/annotation_types/data/test_text.py | 55 -------------- .../data/annotation_types/data/test_video.py | 73 ------------------- 3 files changed, 4 insertions(+), 140 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_types/data/test_text.py delete mode 100644 libs/labelbox/tests/data/annotation_types/data/test_video.py diff --git a/libs/labelbox/tests/data/annotation_types/data/test_raster.py b/libs/labelbox/tests/data/annotation_types/data/test_raster.py index 304ed3e95..209419aed 100644 --- a/libs/labelbox/tests/data/annotation_types/data/test_raster.py +++ b/libs/labelbox/tests/data/annotation_types/data/test_raster.py @@ -11,7 +11,7 @@ def test_validate_schema(): with pytest.raises(ValidationError): - GenericDataRowData() + MaskData() def test_im_bytes(): @@ -24,15 +24,9 @@ def test_im_bytes(): def test_im_url(): - raster_data = GenericDataRowData(url="https://picsum.photos/id/829/200/300") - data_ = raster_data.value - assert data_.shape == (300, 200, 3) - - -def test_im_path(): - img_path = "/tmp/img.jpg" - urllib.request.urlretrieve("https://picsum.photos/id/829/200/300", img_path) - raster_data = GenericDataRowData(file_path=img_path) + raster_data = MaskData( + uid="test", url="https://picsum.photos/id/829/200/300" + ) data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -43,12 +37,10 @@ def test_ref(): metadata = [] media_attributes = {} data = GenericDataRowData( - external_id=external_id, uid=uid, metadata=metadata, media_attributes=media_attributes, ) - assert data.external_id == external_id assert data.uid == uid assert data.media_attributes == media_attributes assert data.metadata == metadata diff --git a/libs/labelbox/tests/data/annotation_types/data/test_text.py b/libs/labelbox/tests/data/annotation_types/data/test_text.py deleted file mode 100644 index 865f93e65..000000000 --- a/libs/labelbox/tests/data/annotation_types/data/test_text.py +++ /dev/null @@ -1,55 +0,0 @@ -import os - -import pytest - -from labelbox.data.annotation_types import TextData -from pydantic import ValidationError - - -def test_validate_schema(): - with pytest.raises(ValidationError): - data = TextData() - - -def test_text(): - text = "hello world" - metadata = [] - media_attributes = {} - text_data = TextData( - text=text, metadata=metadata, media_attributes=media_attributes - ) - assert text_data.text == text - - -def test_url(): - url = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/sample3.txt" - text_data = TextData(url=url) - text = text_data.value - assert len(text) == 3541 - - -def test_file(tmpdir): - content = "foo bar baz" - file = "hello.txt" - dir = tmpdir.mkdir("data") - dir.join(file).write(content) - text_data = TextData(file_path=os.path.join(dir.strpath, file)) - assert len(text_data.value) == len(content) - - -def test_ref(): - external_id = "external_id" - uid = "uid" - metadata = [] - media_attributes = {} - data = TextData( - text="hello world", - external_id=external_id, - uid=uid, - metadata=metadata, - media_attributes=media_attributes, - ) - assert data.external_id == external_id - assert data.uid == uid - assert data.media_attributes == media_attributes - assert data.metadata == metadata diff --git a/libs/labelbox/tests/data/annotation_types/data/test_video.py b/libs/labelbox/tests/data/annotation_types/data/test_video.py deleted file mode 100644 index 5fd77c2c8..000000000 --- a/libs/labelbox/tests/data/annotation_types/data/test_video.py +++ /dev/null @@ -1,73 +0,0 @@ -import numpy as np -import pytest - -from labelbox.data.annotation_types import VideoData -from pydantic import ValidationError - - -def test_validate_schema(): - with pytest.raises(ValidationError): - data = VideoData() - - -def test_frames(): - data = { - x: (np.random.random((32, 32, 3)) * 255).astype(np.uint8) - for x in range(5) - } - video_data = VideoData(frames=data) - for idx, frame in video_data.frame_generator(): - assert idx in data - assert np.all(frame == data[idx]) - - -def test_file_path(): - path = "tests/integration/media/cat.mp4" - raster_data = VideoData(file_path=path) - - with pytest.raises(ValueError): - raster_data[0] - - raster_data.load_frames() - raster_data[0] - - frame_indices = list(raster_data.frames.keys()) - # 29 frames - assert set(frame_indices) == set(list(range(28))) - - -def test_file_url(): - url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerMeltdowns.mp4" - raster_data = VideoData(url=url) - - with pytest.raises(ValueError): - raster_data[0] - - raster_data.load_frames() - raster_data[0] - - frame_indices = list(raster_data.frames.keys()) - # 362 frames - assert set(frame_indices) == set(list(range(361))) - - -def test_ref(): - external_id = "external_id" - uid = "uid" - data = { - x: (np.random.random((32, 32, 3)) * 255).astype(np.uint8) - for x in range(5) - } - metadata = [] - media_attributes = {} - data = VideoData( - frames=data, - external_id=external_id, - uid=uid, - metadata=metadata, - media_attributes=media_attributes, - ) - assert data.external_id == external_id - assert data.uid == uid - assert data.media_attributes == media_attributes - assert data.metadata == metadata From c32bd715c292537d607be6d3eb3eb555cba71cfe Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:51:28 -0500 Subject: [PATCH 34/35] Removed videoData --- .../data/serialization/ndjson/test_export_video_objects.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 1ab678cde..a0cd13e81 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -1,13 +1,13 @@ from labelbox.data.annotation_types import Label, VideoObjectAnnotation from labelbox.data.serialization.ndjson.converter import NDJsonConverter from labelbox.data.annotation_types.geometry import Rectangle, Point -from labelbox.data.annotation_types import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData def video_bbox_label(): return Label( uid="cl1z52xwh00050fhcmfgczqvn", - data=VideoData( + data=GenericDataRowData( uid="cklr9mr4m5iao0rb6cvxu4qbn", url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), From e0eb4c3e8083400c59f6c33e9b2f996469261b1a Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:09:38 -0500 Subject: [PATCH 35/35] Remove union --- .../src/labelbox/data/serialization/ndjson/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index 86cf0d094..fedf4d91b 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -232,7 +232,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[GenericDataRowData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, ) -> "NDText": @@ -264,7 +264,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[GenericDataRowData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None,