From 0bbd7c290ac4aa55bc5a373b3b63fa77c68dc39f Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 16 Sep 2024 18:05:39 -0700 Subject: [PATCH 01/44] Vb/fix ontology leaks plt 1379 (#1814) --- .../labelbox/schema/bulk_import_request.py | 8 +- .../schema/labeling_service_dashboard.py | 38 ++-- libs/labelbox/tests/conftest.py | 191 +++++++++++++----- .../tests/data/annotation_import/conftest.py | 20 +- .../data/annotation_import/test_model_run.py | 20 +- libs/labelbox/tests/data/export/conftest.py | 11 +- .../tests/data/test_data_row_metadata.py | 15 -- libs/labelbox/tests/integration/conftest.py | 4 +- .../tests/integration/test_feature_schema.py | 18 +- .../unit/test_labeling_service_dashboard.py | 102 +++++----- 10 files changed, 260 insertions(+), 167 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py index 44ac7cd6a..8e11f3261 100644 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ b/libs/labelbox/src/labelbox/schema/bulk_import_request.py @@ -787,9 +787,7 @@ def validate_feature_schemas( # A union with custom construction logic to improve error messages class NDClassification( SpecialUnion, - Type[ # type: ignore - Union[NDText, NDRadio, NDChecklist] - ], + Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore ): ... @@ -979,9 +977,7 @@ class NDTool( class NDAnnotation( SpecialUnion, - Type[ # type: ignore - Union[NDTool, NDClassification] - ], + Type[Union[NDTool, NDClassification]], # type: ignore ): @classmethod def build(cls: Any, data) -> "NDBase": diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index 2052897f6..c5e1fa11e 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -84,7 +84,8 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if not self.client.enable_experimental: raise RuntimeError( - "Please enable experimental in client to use LabelingService") + "Please enable experimental in client to use LabelingService" + ) @property def service_type(self): @@ -97,20 +98,28 @@ def service_type(self): if self.editor_task_type is None: return sentence_case(self.media_type.value) - if (self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation - and self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Offline chat evaluation" - if (self.editor_task_type == EditorTaskType.ModelChatEvaluation and - self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.ModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Live chat evaluation" - if (self.editor_task_type == EditorTaskType.ResponseCreation and - self.media_type == MediaType.Text): + if ( + self.editor_task_type == EditorTaskType.ResponseCreation + and self.media_type == MediaType.Text + ): return "Response creation" - if (self.media_type == MediaType.LLMPromptCreation or - self.media_type == MediaType.LLMPromptResponseCreation): + if ( + self.media_type == MediaType.LLMPromptCreation + or self.media_type == MediaType.LLMPromptResponseCreation + ): return "Prompt response creation" return sentence_case(self.media_type.value) @@ -154,7 +163,8 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) else: template = Template( """query SearchProjectsPyApi($$first: Int, $$from: String) { @@ -164,11 +174,13 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) query_str = template.substitute( labeling_dashboard_selections=GRAPHQL_QUERY_SELECTIONS, search_query=build_search_filter(search_query) - if search_query else None, + if search_query + else None, ) params: Dict[str, Union[str, int]] = {} @@ -186,7 +198,7 @@ def convert_to_labeling_service_dashboard(client, data): experimental=True, ) - @model_validator(mode='before') + @model_validator(mode="before") def convert_boost_data(cls, data): if "boostStatus" in data: data["status"] = LabelingServiceStatus(data.pop("boostStatus")) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index 446db396b..6d13a8d83 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -7,7 +7,9 @@ import re import uuid import time +from labelbox.schema.project import Project import requests +from labelbox.schema.ontology import Ontology import pytest from types import SimpleNamespace from typing import Type @@ -23,21 +25,11 @@ from labelbox.schema.queue_mode import QueueMode from labelbox import Client -from labelbox import Dataset, DataRow from labelbox import LabelingFrontend -from labelbox import OntologyBuilder, Tool, Option, Classification, MediaType -from labelbox.orm import query -from labelbox.pagination import PaginatedCollection +from labelbox import OntologyBuilder, Tool, Option, Classification from labelbox.schema.annotation_import import LabelImport -from labelbox.schema.catalog import Catalog from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.invite import Invite -from labelbox.schema.quality_mode import QualityMode -from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.user import User from labelbox.exceptions import LabelboxError -from contextlib import suppress -from labelbox import Client IMG_URL = "https://picsum.photos/200/300.jpg" MASKABLE_IMG_URL = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg" @@ -638,17 +630,22 @@ def organization(client): def configured_project_with_label( client, rand_gen, - image_url, - project, dataset, data_row, wait_for_label_processing, + teardown_helpers, ): """Project with a connected dataset, having one datarow + Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) project._wait_until_data_rows_are_processed( data_row_ids=[data_row.uid], wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS, @@ -666,8 +663,7 @@ def configured_project_with_label( ) yield [project, dataset, data_row, label] - for label in project.labels(): - label.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) def _create_label(project, data_row, ontology, wait_for_label_processing): @@ -736,13 +732,23 @@ def big_dataset(dataset: Dataset): @pytest.fixture def configured_batch_project_with_label( - project, dataset, data_row, wait_for_label_processing + client, + dataset, + data_row, + wait_for_label_processing, + rand_gen, + teardown_helpers, ): """Project with a batch having one datarow Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) data_rows = [dr.uid for dr in list(dataset.data_rows())] project._wait_until_data_rows_are_processed( data_row_ids=data_rows, sleep_interval=3 @@ -757,18 +763,27 @@ def configured_batch_project_with_label( yield [project, dataset, data_row, label] - for label in project.labels(): - label.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture def configured_batch_project_with_multiple_datarows( - project, dataset, data_rows, wait_for_label_processing + client, + dataset, + data_rows, + wait_for_label_processing, + rand_gen, + teardown_helpers, ): """Project with a batch having multiple datarows Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) global_keys = [dr.global_key for dr in data_rows] batch_name = f"batch {uuid.uuid4()}" @@ -780,26 +795,7 @@ def configured_batch_project_with_multiple_datarows( yield [project, dataset, data_rows] - for label in project.labels(): - label.delete() - - -@pytest.fixture -def configured_batch_project_for_labeling_service( - project, data_row_and_global_key -): - """Project with a batch having multiple datarows - Project contains an ontology with 1 bbox tool - Additionally includes a create_label method for any needed extra labels - """ - global_keys = [data_row_and_global_key[1]] - - batch_name = f"batch {uuid.uuid4()}" - project.create_batch(batch_name, global_keys=global_keys) - - _setup_ontology(project) - - yield project + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) # NOTE this is nice heuristics, also there is this logic _wait_until_data_rows_are_processed in Project @@ -1062,7 +1058,7 @@ def project_with_empty_ontology(project): @pytest.fixture def configured_project_with_complex_ontology( - client, initial_dataset, rand_gen, image_url + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), @@ -1127,7 +1123,7 @@ def configured_project_with_complex_ontology( project.setup(editor, ontology.asdict()) yield [project, data_row] - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture @@ -1147,12 +1143,13 @@ def valid_model_id(): @pytest.fixture def requested_labeling_service( - rand_gen, - live_chat_evaluation_project_with_new_dataset, - chat_evaluation_ontology, - model_config, + rand_gen, client, chat_evaluation_ontology, model_config, teardown_helpers ): - project = live_chat_evaluation_project_with_new_dataset + project_name = f"test-model-evaluation-project-{rand_gen(str)}" + dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}" + project = client.create_model_evaluation_project( + name=project_name, dataset_name=dataset_name, data_row_count=1 + ) project.connect_ontology(chat_evaluation_ontology) project.upsert_instructions("tests/integration/media/sample_pdf.pdf") @@ -1164,3 +1161,105 @@ def requested_labeling_service( labeling_service.request() yield project, project.get_labeling_service() + + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) + + +class TearDownHelpers: + @staticmethod + def teardown_project_labels_ontology_feature_schemas(project: Project): + """ + Call this function to release project, labels, ontology and feature schemas in fixture teardown + + NOTE: exception handling is not required as this is a fixture teardown + """ + ontology = project.ontology() + ontology_id = ontology.uid + client = project.client + classification_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["classifications"] + ] + tool_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["tools"] + ] + + feature_schema_ids = ( + classification_feature_schema_ids + tool_feature_schema_ids + ) + labels = list(project.labels()) + for label in labels: + label.delete() + + project.delete() + client.delete_unused_ontology(ontology_id) + for feature_schema_id in feature_schema_ids: + try: + project.client.delete_unused_feature_schema(feature_schema_id) + except LabelboxError as e: + print( + f"Failed to delete feature schema {feature_schema_id}: {e}" + ) + + @staticmethod + def teardown_ontology_feature_schemas(ontology: Ontology): + """ + Call this function to release project, labels, ontology and feature schemas in fixture teardown + + NOTE: exception handling is not required as this is a fixture teardown + """ + ontology_id = ontology.uid + client = ontology.client + classification_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["classifications"] + ] + [ + option["featureSchemaId"] + for feature in ontology.normalized["classifications"] + for option in feature.get("options", []) + ] + + tool_feature_schema_ids = ( + [ + feature["featureSchemaId"] + for feature in ontology.normalized["tools"] + ] + + [ + classification["featureSchemaId"] + for tool in ontology.normalized["tools"] + for classification in tool.get("classifications", []) + ] + + [ + option["featureSchemaId"] + for tool in ontology.normalized["tools"] + for classification in tool.get("classifications", []) + for option in classification.get("options", []) + ] + ) + + feature_schema_ids = ( + classification_feature_schema_ids + tool_feature_schema_ids + ) + + client.delete_unused_ontology(ontology_id) + for feature_schema_id in feature_schema_ids: + try: + project.client.delete_unused_feature_schema(feature_schema_id) + except LabelboxError as e: + print( + f"Failed to delete feature schema {feature_schema_id}: {e}" + ) + + +class ModuleTearDownHelpers(TearDownHelpers): ... + + +@pytest.fixture +def teardown_helpers(): + return TearDownHelpers() + + +@pytest.fixture(scope="module") +def module_teardown_helpers(): + return TearDownHelpers() diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 39cede0bb..6543f54bf 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -1,4 +1,3 @@ -import itertools import uuid from labelbox.schema.model_run import ModelRun @@ -14,7 +13,6 @@ from typing import Tuple, Type from labelbox.schema.annotation_import import LabelImport, AnnotationImportState from pytest import FixtureRequest -from contextlib import suppress """ The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType. @@ -719,7 +717,6 @@ def _create_project( ) project.connect_ontology(ontology) - data_row_data = [] for _ in range(DATA_ROW_COUNT): @@ -752,6 +749,7 @@ def configured_project( normalized_ontology_by_media_type, export_v2_test_helpers, llm_prompt_response_creation_dataset_with_data_row, + teardown_helpers, ): """Configure project for test. Request.param will contain the media type if not present will use Image MediaType. The project will have 10 data rows.""" @@ -789,13 +787,11 @@ def configured_project( yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture() def configured_project_by_global_key( @@ -805,6 +801,7 @@ def configured_project_by_global_key( request: FixtureRequest, normalized_ontology_by_media_type, export_v2_test_helpers, + teardown_helpers, ): """Does the same thing as configured project but with global keys focus.""" @@ -841,13 +838,11 @@ def configured_project_by_global_key( yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture(scope="module") def module_project( @@ -856,6 +851,7 @@ def module_project( data_row_json_by_media_type, request: FixtureRequest, normalized_ontology_by_media_type, + module_teardown_helpers, ): """Generates a image project that scopes to the test module(file). Used to reduce api calls.""" @@ -889,13 +885,13 @@ def module_project( yield project - project.delete() + module_teardown_helpers.teardown_project_labels_ontology_feature_schemas( + project + ) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture def prediction_id_mapping(request, normalized_ontology_by_media_type): diff --git a/libs/labelbox/tests/data/annotation_import/test_model_run.py b/libs/labelbox/tests/data/annotation_import/test_model_run.py index 9eca28429..1174115c5 100644 --- a/libs/labelbox/tests/data/annotation_import/test_model_run.py +++ b/libs/labelbox/tests/data/annotation_import/test_model_run.py @@ -7,13 +7,23 @@ from labelbox import DataSplit, ModelRun -@pytest.mark.order(1) -def test_model_run(client, configured_project_with_label, data_row, rand_gen): +@pytest.fixture +def current_model(client, configured_project_with_label, rand_gen): project, _, _, label = configured_project_with_label - label_id = label.uid ontology = project.ontology() - data = {"name": rand_gen(str), "ontology_id": ontology.uid} - model = client.create_model(data["name"], data["ontology_id"]) + + model = client.create_model(rand_gen(str), ontology.uid) + yield model + + model.delete() + + +def test_model_run( + client, configured_project_with_label, current_model, data_row, rand_gen +): + _, _, _, label = configured_project_with_label + label_id = label.uid + model = current_model name = rand_gen(str) config = {"batch_size": 100, "reruns": None} diff --git a/libs/labelbox/tests/data/export/conftest.py b/libs/labelbox/tests/data/export/conftest.py index 0836c2b9e..0a62f39c8 100644 --- a/libs/labelbox/tests/data/export/conftest.py +++ b/libs/labelbox/tests/data/export/conftest.py @@ -2,7 +2,6 @@ import time import pytest from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.media_type import MediaType from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.annotation_import import LabelImport, AnnotationImportState @@ -242,7 +241,7 @@ def polygon_inference(prediction_id_mapping): @pytest.fixture def configured_project_with_ontology( - client, initial_dataset, ontology, rand_gen, image_url + client, initial_dataset, ontology, rand_gen, image_url, teardown_helpers ): dataset = initial_dataset project = client.create_project( @@ -264,11 +263,13 @@ def configured_project_with_ontology( ) project.data_row_ids = data_row_ids yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture -def configured_project_without_data_rows(client, ontology, rand_gen): +def configured_project_without_data_rows( + client, ontology, rand_gen, teardown_helpers +): project = client.create_project( name=rand_gen(str), description=rand_gen(str), @@ -279,7 +280,7 @@ def configured_project_without_data_rows(client, ontology, rand_gen): )[0] project.setup(editor, ontology) yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture diff --git a/libs/labelbox/tests/data/test_data_row_metadata.py b/libs/labelbox/tests/data/test_data_row_metadata.py index 9a3690776..891cab9be 100644 --- a/libs/labelbox/tests/data/test_data_row_metadata.py +++ b/libs/labelbox/tests/data/test_data_row_metadata.py @@ -92,21 +92,6 @@ def make_named_metadata(dr_id) -> DataRowMetadata: return metadata -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_export_empty_metadata( - client, configured_project_with_label, wait_for_data_row_processing -): - project, _, data_row, _ = configured_project_with_label - data_row = wait_for_data_row_processing(client, data_row) - - export_task = project.export(params={"metadata_fields": True}) - export_task.wait_till_done() - stream = export_task.get_buffered_stream() - data_row = [data_row.json for data_row in stream][0] - - assert data_row["metadata_fields"] == [] - - def test_bulk_export_datarow_metadata(data_row, mdo: DataRowMetadataOntology): metadata = make_metadata(data_row.uid) mdo.bulk_upsert([metadata]) diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index d37287fe8..c917a6164 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -113,7 +113,7 @@ def configured_project( @pytest.fixture def configured_project_with_complex_ontology( - client, initial_dataset, rand_gen, image_url + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), @@ -178,7 +178,7 @@ def configured_project_with_complex_ontology( project.setup(editor, ontology.asdict()) yield [project, data_row] - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture diff --git a/libs/labelbox/tests/integration/test_feature_schema.py b/libs/labelbox/tests/integration/test_feature_schema.py index 1dc940f08..46ec8c067 100644 --- a/libs/labelbox/tests/integration/test_feature_schema.py +++ b/libs/labelbox/tests/integration/test_feature_schema.py @@ -58,9 +58,8 @@ def test_throws_an_error_if_feature_schema_to_delete_doesnt_exist(client): client.delete_unused_feature_schema("doesntexist") -def test_updates_a_feature_schema_title(client): - tool = client.upsert_feature_schema(point.asdict()) - feature_schema_id = tool.normalized["featureSchemaId"] +def test_updates_a_feature_schema_title(client, feature_schema): + feature_schema_id = feature_schema.normalized["featureSchemaId"] new_title = "new title" updated_feature_schema = client.update_feature_schema_title( feature_schema_id, new_title @@ -68,20 +67,16 @@ def test_updates_a_feature_schema_title(client): assert updated_feature_schema.normalized["name"] == new_title - client.delete_unused_feature_schema(feature_schema_id) - def test_throws_an_error_when_updating_a_feature_schema_with_empty_title( - client, + client, feature_schema ): - tool = client.upsert_feature_schema(point.asdict()) + tool = feature_schema feature_schema_id = tool.normalized["featureSchemaId"] with pytest.raises(Exception): client.update_feature_schema_title(feature_schema_id, "") - client.delete_unused_feature_schema(feature_schema_id) - def test_throws_an_error_when_updating_not_existing_feature_schema(client): with pytest.raises(Exception): @@ -107,8 +102,8 @@ def test_updates_a_feature_schema(client, feature_schema): assert updated_feature_schema.normalized["name"] == "new name" -def test_does_not_include_used_feature_schema(client): - tool = client.upsert_feature_schema(point.asdict()) +def test_does_not_include_used_feature_schema(client, feature_schema): + tool = feature_schema feature_schema_id = tool.normalized["featureSchemaId"] ontology = client.create_ontology_from_feature_schemas( name="ontology name", @@ -120,4 +115,3 @@ def test_does_not_include_used_feature_schema(client): assert feature_schema_id not in unused_feature_schemas client.delete_unused_ontology(ontology.uid) - client.delete_unused_feature_schema(feature_schema_id) diff --git a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py index 8ecdef2f1..061efbadf 100644 --- a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py +++ b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py @@ -5,23 +5,23 @@ def test_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count is None @@ -29,23 +29,23 @@ def test_no_tasks_remaining_count(): def test_tasks_remaining_count_exists(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 1, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 1, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 1 @@ -53,23 +53,23 @@ def test_tasks_remaining_count_exists(): def test_tasks_total_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 1, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 1, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 1, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 1, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 0 From 51ecfeab2efa15402d949b5799e21f77ea26ee95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20J=C3=B3=C5=BAwiak?= Date: Mon, 9 Sep 2024 15:24:35 +0200 Subject: [PATCH 02/44] [PTDT-2553] Added integration tests for MMC MAL/GT imports --- .../tests/data/annotation_import/conftest.py | 495 +++++++++++++++++- .../test_generic_data_types.py | 6 + 2 files changed, 500 insertions(+), 1 deletion(-) diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 6543f54bf..2342a759a 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -1,4 +1,5 @@ import uuid +from typing import Union from labelbox.schema.model_run import ModelRun from labelbox.schema.ontology import Ontology @@ -152,6 +153,22 @@ def llm_human_preference_data_row(global_key): return llm_human_preference_data_row +@pytest.fixture(scope="module") +def mmc_data_row_url(): + return "https://storage.googleapis.com/labelbox-datasets/conversational_model_evaluation_sample/offline-model-chat-evaluation.json" + + +@pytest.fixture(scope="module", autouse=True) +def offline_model_evaluation_data_row_factory(mmc_data_row_url: str): + def offline_model_evaluation_data_row(global_key: str): + return { + "row_data": mmc_data_row_url, + "global_key": global_key, + } + + return offline_model_evaluation_data_row + + @pytest.fixture(scope="module", autouse=True) def data_row_json_by_media_type( audio_data_row_factory, @@ -163,6 +180,7 @@ def data_row_json_by_media_type( document_data_row_factory, text_data_row_factory, video_data_row_factory, + offline_model_evaluation_data_row_factory, ): return { MediaType.Audio: audio_data_row_factory, @@ -174,6 +192,7 @@ def data_row_json_by_media_type( MediaType.Document: document_data_row_factory, MediaType.Text: text_data_row_factory, MediaType.Video: video_data_row_factory, + OntologyKind.ModelEvaluation: offline_model_evaluation_data_row_factory, } @@ -345,6 +364,26 @@ def normalized_ontology_by_media_type(): ], } + radio_index = { + "required": False, + "instructions": "radio_index", + "name": "radio_index", + "type": "radio", + "scope": "index", + "options": [ + { + "label": "first_radio_answer", + "value": "first_radio_answer", + "options": [], + }, + { + "label": "second_radio_answer", + "value": "second_radio_answer", + "options": [], + }, + ], + } + prompt_text = { "instructions": "prompt-text", "name": "prompt-text", @@ -403,6 +442,27 @@ def normalized_ontology_by_media_type(): "type": "response-text", } + message_single_selection_task = { + "required": False, + "name": "message-single-selection", + "tool": "message-single-selection", + "classifications": [], + } + + message_multi_selection_task = { + "required": False, + "name": "message-multi-selection", + "tool": "message-multi-selection", + "classifications": [], + } + + message_ranking_task = { + "required": False, + "name": "message-ranking", + "tool": "message-ranking", + "classifications": [], + } + return { MediaType.Image: { "tools": [ @@ -516,6 +576,21 @@ def normalized_ontology_by_media_type(): response_checklist, ], }, + OntologyKind.ModelEvaluation: { + "tools": [ + message_single_selection_task, + message_multi_selection_task, + message_ranking_task, + ], + "classifications": [ + radio, + checklist, + free_form_text, + radio_index, + checklist_index, + free_form_text_index, + ], + }, "all": { "tools": [ bbox_tool, @@ -695,6 +770,45 @@ def _create_prompt_response_project( return prompt_response_project, ontology +def _create_offline_mmc_project( + client: Client, rand_gen, data_row_json, normalized_ontology +) -> Tuple[Project, Ontology, Dataset]: + dataset = client.create_dataset(name=rand_gen(str)) + + project = client.create_offline_model_evaluation_project( + name=f"offline-mmc-{rand_gen(str)}", + ) + + ontology = client.create_ontology( + name=f"offline-mmc-{rand_gen(str)}", + normalized=normalized_ontology, + media_type=MediaType.Conversational, + ontology_kind=OntologyKind.ModelEvaluation, + ) + + project.connect_ontology(ontology) + + data_row_data = [ + data_row_json(rand_gen(str)) for _ in range(DATA_ROW_COUNT) + ] + + task = dataset.create_data_rows(data_row_data) + task.wait_till_done() + global_keys = [row["global_key"] for row in task.result] + data_row_ids = [row["id"] for row in task.result] + + project.create_batch( + rand_gen(str), + data_row_ids, # sample of data row objects + 5, # priority between 1(Highest) - 5(lowest) + ) + project.data_row_ids = data_row_ids + project.data_row_data = data_row_data + project.global_keys = global_keys + + return project, ontology, dataset + + def _create_project( client: Client, rand_gen, @@ -753,7 +867,10 @@ def configured_project( ): """Configure project for test. Request.param will contain the media type if not present will use Image MediaType. The project will have 10 data rows.""" - media_type = getattr(request, "param", MediaType.Image) + media_type: Union[MediaType, OntologyKind] = getattr( + request, "param", MediaType.Image + ) + dataset = None if ( @@ -776,6 +893,13 @@ def configured_project( media_type, normalized_ontology_by_media_type, ) + elif media_type == OntologyKind.ModelEvaluation: + project, ontology, dataset = _create_offline_mmc_project( + client, + rand_gen, + data_row_json_by_media_type[media_type], + normalized_ontology_by_media_type[media_type], + ) else: project, ontology, dataset = _create_project( client, @@ -827,6 +951,13 @@ def configured_project_by_global_key( media_type, normalized_ontology_by_media_type, ) + elif media_type == OntologyKind.ModelEvaluation: + project, ontology, dataset = _create_offline_mmc_project( + client, + rand_gen, + data_row_json_by_media_type[media_type], + normalized_ontology_by_media_type[media_type], + ) else: project, ontology, dataset = _create_project( client, @@ -988,6 +1119,31 @@ def prediction_id_mapping(request, normalized_ontology_by_media_type): return base_annotations +@pytest.fixture +def mmc_example_data_row_message_ids(mmc_data_row_url: str): + data_row_content = requests.get(mmc_data_row_url).json() + + human_id = next( + actor_id + for actor_id, actor_metadata in data_row_content["actors"].items() + if actor_metadata["role"] == "human" + ) + + return { + message_id: [ + { + "id": child_msg_id, + "model_config_name": data_row_content["actors"][ + data_row_content["messages"][child_msg_id]["actorId"] + ]["metadata"]["modelConfigName"], + } + for child_msg_id in message_metadata["childMessageIds"] + ] + for message_id, message_metadata in data_row_content["messages"].items() + if message_metadata["actorId"] == human_id + } + + # Each inference represents a feature type that adds to the base annotation created with prediction_id_mapping @pytest.fixture def polygon_inference(prediction_id_mapping): @@ -1303,6 +1459,31 @@ def checklist_inference_index(prediction_id_mapping): return checklists +@pytest.fixture +def checklist_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + checklists = [] + for feature in prediction_id_mapping: + if "checklist_index" not in feature: + return None + checklist = feature["checklist_index"].copy() + checklist.update( + { + "answers": [ + {"name": "first_checklist_answer"}, + {"name": "second_checklist_answer"}, + ], + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del checklist["tool"] + checklists.append(checklist) + return checklists + + @pytest.fixture def prompt_text_inference(prediction_id_mapping): prompt_texts = [] @@ -1333,6 +1514,45 @@ def radio_response_inference(prediction_id_mapping): return response_radios +@pytest.fixture +def radio_inference(prediction_id_mapping): + radios = [] + for feature in prediction_id_mapping: + if "radio" not in feature: + continue + radio = feature["radio"].copy() + radio.update( + { + "answer": {"name": "first_radio_answer"}, + } + ) + del radio["tool"] + radios.append(radio) + return radios + + +@pytest.fixture +def radio_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + radios = [] + for feature in prediction_id_mapping: + if "radio_index" not in feature: + continue + radio = feature["radio_index"].copy() + radio.update( + { + "answer": {"name": "first_radio_answer"}, + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del radio["tool"] + radios.append(radio) + return radios + + @pytest.fixture def checklist_response_inference(prediction_id_mapping): response_checklists = [] @@ -1402,6 +1622,28 @@ def text_inference_index(prediction_id_mapping): return texts +@pytest.fixture +def text_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + texts = [] + for feature in prediction_id_mapping: + if "text_index" not in feature: + continue + text = feature["text_index"].copy() + text.update( + { + "answer": "free form text...", + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del text["tool"] + texts.append(text) + return texts + + @pytest.fixture def video_checklist_inference(prediction_id_mapping): checklists = [] @@ -1437,6 +1679,118 @@ def video_checklist_inference(prediction_id_mapping): return checklists +@pytest.fixture +def message_single_selection_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-single-selection" not in feature: + continue + selection = feature["message-single-selection"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-single-selection", + "data": { + "messageId": some_child_ids[0]["id"], + "parentMessageId": some_parent_id, + "modelConfigName": some_child_ids[0][ + "model_config_name" + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + +@pytest.fixture +def message_multi_selection_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-multi-selection" not in feature: + continue + selection = feature["message-multi-selection"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-multi-selection", + "data": { + "parentMessageId": some_parent_id, + "selectedMessages": [ + { + "messageId": child_id["id"], + "modelConfigName": child_id[ + "model_config_name" + ], + } + for child_id in some_child_ids + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + +@pytest.fixture +def message_ranking_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-ranking" not in feature: + continue + selection = feature["message-ranking"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-ranking", + "data": { + "parentMessageId": some_parent_id, + "rankedMessages": [ + { + "messageId": child_id["id"], + "modelConfigName": child_id[ + "model_config_name" + ], + "order": idx, + } + for idx, child_id in enumerate( + some_child_ids, start=1 + ) + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + @pytest.fixture def annotations_by_media_type( polygon_inference, @@ -1456,6 +1810,13 @@ def annotations_by_media_type( checklist_response_inference, radio_response_inference, text_response_inference, + message_single_selection_inference, + message_multi_selection_inference, + message_ranking_inference, + checklist_inference_index_mmc, + radio_inference, + radio_inference_index_mmc, + text_inference_index_mmc, ): return { MediaType.Audio: [checklist_inference, text_inference], @@ -1493,6 +1854,17 @@ def annotations_by_media_type( checklist_response_inference, radio_response_inference, ], + OntologyKind.ModelEvaluation: [ + message_single_selection_inference, + message_multi_selection_inference, + message_ranking_inference, + radio_inference, + checklist_inference, + text_inference, + radio_inference_index_mmc, + checklist_inference_index_mmc, + text_inference_index_mmc, + ], } @@ -2162,6 +2534,125 @@ def expected_export_v2_llm_response_creation(): return expected_annotations +@pytest.fixture +def expected_exports_v2_mmc(mmc_example_data_row_message_ids): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + return { + "objects": [ + { + "name": "message-single-selection", + "annotation_kind": "MessageSingleSelection", + "classifications": [], + "selected_message": { + "message_id": some_child_ids[0]["id"], + "model_config_name": some_child_ids[0]["model_config_name"], + "parent_message_id": some_parent_id, + }, + }, + { + "name": "message-multi-selection", + "annotation_kind": "MessageMultiSelection", + "classifications": [], + "selected_messages": { + "messages": [ + { + "message_id": child_id["id"], + "model_config_name": child_id["model_config_name"], + } + for child_id in some_child_ids + ], + "parent_message_id": some_parent_id, + }, + }, + { + "name": "message-ranking", + "annotation_kind": "MessageRanking", + "classifications": [], + "ranked_messages": { + "ranked_messages": [ + { + "message_id": child_id["id"], + "model_config_name": child_id["model_config_name"], + "order": idx, + } + for idx, child_id in enumerate(some_child_ids, start=1) + ], + "parent_message_id": some_parent_id, + }, + }, + ], + "classifications": [ + { + "name": "radio", + "value": "radio", + "radio_answer": { + "name": "first_radio_answer", + "value": "first_radio_answer", + "classifications": [], + }, + }, + { + "name": "checklist", + "value": "checklist", + "checklist_answers": [ + { + "name": "first_checklist_answer", + "value": "first_checklist_answer", + "classifications": [], + }, + { + "name": "second_checklist_answer", + "value": "second_checklist_answer", + "classifications": [], + }, + ], + }, + { + "name": "text", + "value": "text", + "text_answer": {"content": "free form text..."}, + }, + { + "name": "radio_index", + "value": "radio_index", + "message_id": some_parent_id, + "conversational_radio_answer": { + "name": "first_radio_answer", + "value": "first_radio_answer", + "classifications": [], + }, + }, + { + "name": "checklist_index", + "value": "checklist_index", + "message_id": some_parent_id, + "conversational_checklist_answers": [ + { + "name": "first_checklist_answer", + "value": "first_checklist_answer", + "classifications": [], + }, + { + "name": "second_checklist_answer", + "value": "second_checklist_answer", + "classifications": [], + }, + ], + }, + { + "name": "text_index", + "value": "text_index", + "message_id": some_parent_id, + "conversational_text_answer": {"content": "free form text..."}, + }, + ], + "relationships": [], + } + + @pytest.fixture def exports_v2_by_media_type( expected_export_v2_image, @@ -2175,6 +2666,7 @@ def exports_v2_by_media_type( expected_export_v2_llm_prompt_response_creation, expected_export_v2_llm_prompt_creation, expected_export_v2_llm_response_creation, + expected_exports_v2_mmc, ): return { MediaType.Image: expected_export_v2_image, @@ -2188,6 +2680,7 @@ def exports_v2_by_media_type( MediaType.LLMPromptResponseCreation: expected_export_v2_llm_prompt_response_creation, MediaType.LLMPromptCreation: expected_export_v2_llm_prompt_creation, OntologyKind.ResponseCreation: expected_export_v2_llm_response_creation, + OntologyKind.ModelEvaluation: expected_exports_v2_mmc, } diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index f8f0c449a..9de67bd4e 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -41,6 +41,7 @@ def validate_iso_format(date_string: str): (MediaType.LLMPromptResponseCreation, GenericDataRowData), (MediaType.LLMPromptCreation, GenericDataRowData), (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), ], ) def test_generic_data_row_type_by_data_row_id( @@ -76,6 +77,7 @@ def test_generic_data_row_type_by_data_row_id( # (MediaType.LLMPromptResponseCreation, GenericDataRowData), # (MediaType.LLMPromptCreation, GenericDataRowData), (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), ], ) def test_generic_data_row_type_by_global_key( @@ -115,6 +117,7 @@ def test_generic_data_row_type_by_global_key( ), (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project"], ) @@ -191,6 +194,7 @@ def test_import_media_types( (MediaType.Document, MediaType.Document), (MediaType.Dicom, MediaType.Dicom), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project_by_global_key"], ) @@ -275,6 +279,7 @@ def test_import_media_types_by_global_key( ), (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project"], ) @@ -309,6 +314,7 @@ def test_import_mal_annotations( (MediaType.Document, MediaType.Document), (MediaType.Dicom, MediaType.Dicom), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project_by_global_key"], ) From 24e07661a77f60190a31e0ee6077e04b65a373fe Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 17 Sep 2024 10:57:13 -0700 Subject: [PATCH 03/44] SDK release v.5.0.0 prep (#1823) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 10 ++++++++++ libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 2 +- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b9870b87a..a67a44a24 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '4.0.0' +release = '5.0.0' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index ae97086c6..b2d41b56d 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,4 +1,14 @@ # Changelog +# Version 5.0.0 (2024-09-16) +## Updated +* Set tasks_remaining_count to None LabelingServiceDashboard if labeling has not started ([#1817](https://github.com/Labelbox/labelbox-python/pull/1817)) +* Improve error messaging when creating LLM project with invalid dataset id parameter([#1799](https://github.com/Labelbox/labelbox-python/pull/1799)) +## Removed +* BREAKING CHANGE SDK methods for exports v1([#1800](https://github.com/Labelbox/labelbox-python/pull/1800)) +* BREAKING CHANGE Unused labelbox_v1 serialization package([#1803](https://github.com/Labelbox/labelbox-python/pull/1803)) +## Fixed +* Cuid dependencies that cause a crash if numpy is not installed ([#1807](https://github.com/Labelbox/labelbox-python/pull/1807)) + # Version 4.0.0 (2024-09-10) ## Added * BREAKING CHANGE for pydantic V1 users: Converted SDK to use pydantic V2([#1738](https://github.com/Labelbox/labelbox-python/pull/1738)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index 58ce3410a..f4c24af59 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "4.0.0" +version = "5.0.0" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 633e8f4c2..5b5ac1f67 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,6 +1,6 @@ name = "labelbox" -__version__ = "4.0.0" +__version__ = "5.0.0" from labelbox.client import Client from labelbox.schema.project import Project From 2faf9a10c068621e3a58a690b1dbbddbce0c0f25 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:01:28 -0700 Subject: [PATCH 04/44] Vb/merge 5.0.0 (#1826) Co-authored-by: Gabe <33893811+Gabefire@users.noreply.github.com> --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/orm/model.py | 1 - libs/labelbox/src/labelbox/schema/__init__.py | 21 +- .../labelbox/schema/bulk_import_request.py | 1004 ----------------- libs/labelbox/src/labelbox/schema/enums.py | 25 - libs/labelbox/src/labelbox/schema/project.py | 120 +- .../test_bulk_import_request.py | 258 ----- .../test_ndjson_validation.py | 53 +- .../classification_import_global_key.json | 54 - ...conversation_entity_import_global_key.json | 25 - .../data/assets/ndjson/image_import.json | 779 +------------ .../ndjson/image_import_global_key.json | 823 -------------- .../assets/ndjson/image_import_name_only.json | 810 +------------ .../ndjson/metric_import_global_key.json | 10 - .../assets/ndjson/pdf_import_global_key.json | 155 --- .../ndjson/polyline_import_global_key.json | 36 - .../ndjson/text_entity_import_global_key.json | 26 - .../ndjson/video_import_global_key.json | 166 --- .../serialization/ndjson/test_checklist.py | 26 - .../ndjson/test_classification.py | 108 +- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 - .../serialization/ndjson/test_document.py | 294 ++++- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 - .../serialization/ndjson/test_global_key.py | 125 +- .../data/serialization/ndjson/test_image.py | 203 +++- .../data/serialization/ndjson/test_metric.py | 170 ++- .../data/serialization/ndjson/test_mmc.py | 125 +- .../ndjson/test_ndlabel_subclass_matching.py | 19 - .../data/serialization/ndjson/test_nested.py | 236 +++- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 - .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 ++- .../data/serialization/ndjson/test_text.py | 10 - .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +++++++++++++- 39 files changed, 2380 insertions(+), 4767 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json delete mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 5b5ac1f67..f9b82b422 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,7 +6,6 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 84dcac774..1f3ee1d86 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,7 +386,6 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] - BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 03327e0d1..e57c04a29 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,29 +1,28 @@ -import labelbox.schema.asset_attachment -import labelbox.schema.bulk_import_request import labelbox.schema.annotation_import +import labelbox.schema.asset_attachment +import labelbox.schema.batch import labelbox.schema.benchmark +import labelbox.schema.catalog import labelbox.schema.data_row +import labelbox.schema.data_row_metadata import labelbox.schema.dataset +import labelbox.schema.iam_integration +import labelbox.schema.identifiable +import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service +import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology +import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project +import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook -import labelbox.schema.data_row_metadata -import labelbox.schema.batch -import labelbox.schema.iam_integration -import labelbox.schema.media_type -import labelbox.schema.identifiables -import labelbox.schema.identifiable -import labelbox.schema.catalog -import labelbox.schema.ontology_kind -import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py deleted file mode 100644 index 8e11f3261..000000000 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ /dev/null @@ -1,1004 +0,0 @@ -import json -import time -from uuid import UUID, uuid4 -import functools - -import logging -from pathlib import Path -from google.api_core import retry -from labelbox import parser -import requests -from pydantic import ( - ValidationError, - BaseModel, - Field, - field_validator, - model_validator, - ConfigDict, - StringConstraints, -) -from typing_extensions import Literal, Annotated -from typing import ( - Any, - List, - Optional, - BinaryIO, - Dict, - Iterable, - Tuple, - Union, - Type, - Set, - TYPE_CHECKING, -) - -from labelbox import exceptions as lb_exceptions -from labelbox import utils -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Relationship -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.serialization import serialize_labels -from labelbox.orm.model import Field as lb_Field - -if TYPE_CHECKING: - from labelbox import Project - from labelbox.types import Label - -NDJSON_MIME_TYPE = "application/x-ndjson" -logger = logging.getLogger(__name__) - -# TODO: Deprecate this library in place of labelimport and malprediction import library. - - -def _determinants(parent_cls: Any) -> List[str]: - return [ - k - for k, v in parent_cls.model_fields.items() - if v.json_schema_extra and "determinant" in v.json_schema_extra - ] - - -def _make_file_name(project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - -# TODO(gszpak): move it to client.py -def _make_request_data( - project_id: str, name: str, content_length: int, file_name: str -) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % query.results_query_part(BulkImportRequest) - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length, - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})), - } - - -def _send_create_file_command( - client, - request_data: dict, - file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str], -) -> dict: - response = client.execute(data=request_data, files={file_name: file_data}) - - if not response.get("createBulkImportRequest", None): - raise lb_exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" - % response.get("errors", None) - or response.get("error", None) - ) - - return response - - -class BulkImportRequest(DbObject): - """Represents the import job when importing annotations. - - Attributes: - name (str) - state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) - input_file_url (str): URL to your web-hosted NDJSON file - error_file_url (str): NDJSON that contains error messages for failed annotations - status_file_url (str): NDJSON that contains status for each annotation - created_at (datetime): UTC timestamp for date BulkImportRequest was created - - project (Relationship): `ToOne` relationship to Project - created_by (Relationship): `ToOne` relationship to User - """ - - name = lb_Field.String("name") - state = lb_Field.Enum(BulkImportRequestState, "state") - input_file_url = lb_Field.String("input_file_url") - error_file_url = lb_Field.String("error_file_url") - status_file_url = lb_Field.String("status_file_url") - created_at = lb_Field.DateTime("created_at") - - project = Relationship.ToOne("Project") - created_by = Relationship.ToOne("User", False, "created_by") - - @property - def inputs(self) -> List[Dict[str, Any]]: - """ - Inputs for each individual annotation uploaded. - This should match the ndjson annotations that you have uploaded. - - Returns: - Uploaded ndjson. - - * This information will expire after 24 hours. - """ - return self._fetch_remote_ndjson(self.input_file_url) - - @property - def errors(self) -> List[Dict[str, Any]]: - """ - Errors for each individual annotation uploaded. This is a subset of statuses - - Returns: - List of dicts containing error messages. Empty list means there were no errors - See `BulkImportRequest.statuses` for more details. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.error_file_url) - - @property - def statuses(self) -> List[Dict[str, Any]]: - """ - Status for each individual annotation uploaded. - - Returns: - A status for each annotation if the upload is done running. - See below table for more details - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - Field - - Description - * - uuid - - Specifies the annotation for the status row. - * - dataRow - - JSON object containing the Labelbox data row ID for the annotation. - * - status - - Indicates SUCCESS or FAILURE. - * - errors - - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.status_file_url) - - @functools.lru_cache() - def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: - """ - Fetches the remote ndjson file and caches the results. - - Args: - url (str): Can be any url pointing to an ndjson file. - Returns: - ndjson as a list of dicts. - """ - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.text) - - def refresh(self) -> None: - """Synchronizes values of all fields with the database.""" - query_str, params = query.get_single(BulkImportRequest, self.uid) - res = self.client.execute(query_str, params) - res = res[utils.camel_case(BulkImportRequest.type_name())] - self._set_field_values(res) - - def wait_till_done(self, sleep_time_seconds: int = 5) -> None: - self.wait_until_done(sleep_time_seconds) - - def wait_until_done(self, sleep_time_seconds: int = 5) -> None: - """Blocks import job until certain conditions are met. - - Blocks until the BulkImportRequest.state changes either to - `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, - periodically refreshing object's state. - - Args: - sleep_time_seconds (str): a time to block between subsequent API calls - """ - while self.state == BulkImportRequestState.RUNNING: - logger.info(f"Sleeping for {sleep_time_seconds} seconds...") - time.sleep(sleep_time_seconds) - self.__exponential_backoff_refresh() - - @retry.Retry( - predicate=retry.if_exception_type( - lb_exceptions.ApiLimitError, - lb_exceptions.TimeoutError, - lb_exceptions.NetworkError, - ) - ) - def __exponential_backoff_refresh(self) -> None: - self.refresh() - - @classmethod - def from_name( - cls, client, project_id: str, name: str - ) -> "BulkImportRequest": - """Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name} - response = client.execute(query_str, params=params) - return cls(client, response["bulkImportRequest"]) - - @classmethod - def create_from_url( - cls, client, project_id: str, name: str, url: str, validate=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - validate (bool): a flag indicating if there should be a validation - if `url` is valid ndjson - Returns: - BulkImportRequest object - """ - if validate: - logger.warn( - "Validation is turned on. The file will be downloaded locally and processed before uploading." - ) - res = requests.get(url) - data = parser.loads(res.text) - _validate_ndjson(data, client.get_project(project_id)) - - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls( - client, bulk_import_request_response["createBulkImportRequest"] - ) - - @classmethod - def create_from_objects( - cls, - client, - project_id: str, - name: str, - predictions: Union[Iterable[Dict], Iterable["Label"]], - validate=True, - ) -> "BulkImportRequest": - """ - Creates a `BulkImportRequest` from an iterable of dictionaries. - - Conforms to JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - validate (bool): a flag indicating if there should be a validation - if `predictions` is valid ndjson - Returns: - BulkImportRequest object - """ - if not isinstance(predictions, list): - raise TypeError( - f"annotations must be in a form of Iterable. Found {type(predictions)}" - ) - ndjson_predictions = serialize_labels(predictions) - - if validate: - _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - - data_str = parser.dumps(ndjson_predictions) - if not data_str: - raise ValueError("annotations cannot be empty") - - data = data_str.encode("utf-8") - file_name = _make_file_name(project_id, name) - request_data = _make_request_data( - project_id, name, len(data_str), file_name - ) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, - request_data=request_data, - file_name=file_name, - file_data=file_data, - ) - - return cls(client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file( - cls, client, project_id: str, name: str, file: Path, validate_file=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = _make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = _make_request_data( - project_id, name, content_length, file_name - ) - - with file.open("rb") as f: - if validate_file: - reader = parser.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - _validate_ndjson(reader, client.get_project(project_id)) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, request_data, file_name, file_data - ) - return cls(client, response_data["createBulkImportRequest"]) - - def delete(self) -> None: - """Deletes the import job and also any annotations created by this import. - - Returns: - None - """ - id_param = "bulk_request_id" - query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { - deleteBulkImportRequest(where: {id: $%s}) { - id - name - } - }""" % (id_param, id_param) - self.client.execute(query_str, {id_param: self.uid}) - - -def _validate_ndjson( - lines: Iterable[Dict[str, Any]], project: "Project" -) -> None: - """ - Client side validation of an ndjson object. - - Does not guarentee that an upload will succeed for the following reasons: - * We are not checking the data row types which will cause the following errors to slip through - * Missing frame indices will not causes an error for videos - * Uploaded annotations for the wrong data type will pass (Eg. entity on images) - * We are not checking bounds of an asset (Eg. frame index, image height, text location) - - Args: - lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines - project (Project): id of project for which predictions will be imported - - Raises: - MALValidationError: Raise for invalid NDJson - UuidError: Duplicate UUID in upload - """ - feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( - project.ontology() - ) - uids: Set[str] = set() - for idx, line in enumerate(lines): - try: - annotation = NDAnnotation(**line) - annotation.validate_instance( - feature_schemas_by_id, feature_schemas_by_name - ) - uuid = str(annotation.uuid) - if uuid in uids: - raise lb_exceptions.UuidError( - f"{uuid} already used in this import job, " - "must be unique for the project." - ) - uids.add(uuid) - except (ValidationError, ValueError, TypeError, KeyError) as e: - raise lb_exceptions.MALValidationError( - f"Invalid NDJson on line {idx}" - ) from e - - -# The rest of this file contains objects for MAL validation -def parse_classification(tool): - """ - Parses a classification from an ontology. Only radio, checklist, and text are supported for mal - - Args: - tool (dict) - - Returns: - dict - """ - if tool["type"] in ["radio", "checklist"]: - option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] - option_names = [r["value"] for r in tool["options"]] - return { - "tool": tool["type"], - "featureSchemaId": tool["featureSchemaId"], - "name": tool["name"], - "options": [*option_schema_ids, *option_names], - } - elif tool["type"] == "text": - return { - "tool": tool["type"], - "name": tool["name"], - "featureSchemaId": tool["featureSchemaId"], - } - - -def get_mal_schemas(ontology): - """ - Converts a project ontology to a dict for easier lookup during ndjson validation - - Args: - ontology (Ontology) - Returns: - Dict, Dict : Useful for looking up a tool from a given feature schema id or name - """ - - valid_feature_schemas_by_schema_id = {} - valid_feature_schemas_by_name = {} - for tool in ontology.normalized["tools"]: - classifications = [ - parse_classification(classification_tool) - for classification_tool in tool["classifications"] - ] - classifications_by_schema_id = { - v["featureSchemaId"]: v for v in classifications - } - classifications_by_name = {v["name"]: v for v in classifications} - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - valid_feature_schemas_by_name[tool["name"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - for tool in ontology.normalized["classifications"]: - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( - parse_classification(tool) - ) - valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) - return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name - - -class Bbox(BaseModel): - top: float - left: float - height: float - width: float - - -class Point(BaseModel): - x: float - y: float - - -class FrameLocation(BaseModel): - end: int - start: int - - -class VideoSupported(BaseModel): - # Note that frames are only allowed as top level inferences for video - frames: Optional[List[FrameLocation]] = None - - -# Base class for a special kind of union. -class SpecialUnion: - def __new__(cls, **kwargs): - return cls.build(kwargs) - - @classmethod - def __get_validators__(cls): - yield cls.build - - @classmethod - def get_union_types(cls): - if not issubclass(cls, SpecialUnion): - raise TypeError("{} must be a subclass of SpecialUnion") - - union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] - if len(union_types) < 1: - raise TypeError( - "Class {cls} should inherit from a union of objects to build" - ) - if len(union_types) > 1: - raise TypeError( - f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" - ) - return union_types[0].__args__[0].__args__ - - @classmethod - def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": - """ - Checks through all objects in the union to see which matches the input data. - Args: - data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union - raises: - KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion - ValidationError: Error while trying to construct a specific object in the union - - """ - if isinstance(data, BaseModel): - data = data.model_dump() - - top_level_fields = [] - max_match = 0 - matched = None - - for type_ in cls.get_union_types(): - determinate_fields = _determinants(type_) - top_level_fields.append(determinate_fields) - matches = sum([val in determinate_fields for val in data]) - if matches == len(determinate_fields) and matches > max_match: - max_match = matches - matched = type_ - - if matched is not None: - # These two have the exact same top level keys - if matched in [NDRadio, NDText]: - if isinstance(data["answer"], dict): - matched = NDRadio - elif isinstance(data["answer"], str): - matched = NDText - else: - raise TypeError( - f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" - ) - return matched(**data) - else: - raise KeyError( - f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." - ) - - @classmethod - def schema(cls): - results = {"definitions": {}} - for cl in cls.get_union_types(): - schema = cl.schema() - results["definitions"].update(schema.pop("definitions")) - results[cl.__name__] = schema - return results - - -class DataRow(BaseModel): - id: str - - -class NDFeatureSchema(BaseModel): - schemaId: Optional[str] = None - name: Optional[str] = None - - @model_validator(mode="after") - def most_set_one(self): - if self.schemaId is None and self.name is None: - raise ValueError( - "Must set either schemaId or name for all feature schemas" - ) - return self - - -class NDBase(NDFeatureSchema): - ontology_type: str - uuid: UUID - dataRow: DataRow - model_config = ConfigDict(extra="forbid") - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - if self.name: - if self.name not in valid_feature_schemas_by_name: - raise ValueError( - f"Name {self.name} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_name[self.name]["tool"] - ): - raise ValueError( - f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" - ) - - if self.schemaId: - if self.schemaId not in valid_feature_schemas_by_id: - raise ValueError( - f"Schema id {self.schemaId} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_id[self.schemaId]["tool"] - ): - raise ValueError( - f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" - ) - - def validate_instance( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - self.validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - - -###### Classifications ###### - - -class NDText(NDBase): - ontology_type: Literal["text"] = "text" - answer: str = Field(json_schema_extra={"determinant": True}) - # No feature schema to check - - -class NDChecklist(VideoSupported, NDBase): - ontology_type: Literal["checklist"] = "checklist" - answers: List[NDFeatureSchema] = Field( - json_schema_extra={"determinant": True} - ) - - @field_validator("answers", mode="before") - def validate_answers(cls, value, field): - # constr not working with mypy. - if not len(value): - raise ValueError("Checklist answers should not be empty") - return value - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - # Test top level feature schema for this tool - super(NDChecklist, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - # Test the feature schemas provided to the answer field - if len( - set([answer.name or answer.schemaId for answer in self.answers]) - ) != len(self.answers): - raise ValueError( - f"Duplicated featureSchema found for checklist {self.uuid}" - ) - for answer in self.answers: - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if answer.name not in options and answer.schemaId not in options: - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" - ) - - -class NDRadio(VideoSupported, NDBase): - ontology_type: Literal["radio"] = "radio" - answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDRadio, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if ( - self.answer.name not in options - and self.answer.schemaId not in options - ): - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" - ) - - -# A union with custom construction logic to improve error messages -class NDClassification( - SpecialUnion, - Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore -): ... - - -###### Tools ###### - - -class NDBaseTool(NDBase): - classifications: List[NDClassification] = [] - - # This is indepdent of our problem - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDBaseTool, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - for classification in self.classifications: - classification.validate_feature_schemas( - valid_feature_schemas_by_name[self.name][ - "classificationsBySchemaId" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsBySchemaId" - ], - valid_feature_schemas_by_name[self.name][ - "classificationsByName" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsByName" - ], - ) - - @field_validator("classifications", mode="before") - def validate_subclasses(cls, value, field): - # Create uuid and datarow id so we don't have to define classification objects twice - # This is caused by the fact that we require these ids for top level classifications but not for subclasses - results = [] - dummy_id = "child".center(25, "_") - for row in value: - results.append( - {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} - ) - return results - - -class NDPolygon(NDBaseTool): - ontology_type: Literal["polygon"] = "polygon" - polygon: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("polygon") - def is_geom_valid(cls, v): - if len(v) < 3: - raise ValueError( - f"A polygon must have at least 3 points to be valid. Found {v}" - ) - return v - - -class NDPolyline(NDBaseTool): - ontology_type: Literal["line"] = "line" - line: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("line") - def is_geom_valid(cls, v): - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - return v - - -class NDRectangle(NDBaseTool): - ontology_type: Literal["rectangle"] = "rectangle" - bbox: Bbox = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class NDPoint(NDBaseTool): - ontology_type: Literal["point"] = "point" - point: Point = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class EntityLocation(BaseModel): - start: int - end: int - - -class NDTextEntity(NDBaseTool): - ontology_type: Literal["named-entity"] = "named-entity" - location: EntityLocation = Field(json_schema_extra={"determinant": True}) - - @field_validator("location") - def is_valid_location(cls, v): - if isinstance(v, BaseModel): - v = v.model_dump() - - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - if v["start"] < 0: - raise ValueError(f"Text location must be positive. Found {v}") - if v["start"] > v["end"]: - raise ValueError( - f"Text start location must be less or equal than end. Found {v}" - ) - return v - - -class RLEMaskFeatures(BaseModel): - counts: List[int] - size: List[int] - - @field_validator("counts") - def validate_counts(cls, counts): - if not all([count >= 0 for count in counts]): - raise ValueError( - "Found negative value for counts. They should all be zero or positive" - ) - return counts - - @field_validator("size") - def validate_size(cls, size): - if len(size) != 2: - raise ValueError( - f"Mask `size` should have two ints representing height and with. Found : {size}" - ) - if not all([count > 0 for count in size]): - raise ValueError( - f"Mask `size` should be a postitive int. Found : {size}" - ) - return size - - -class PNGMaskFeatures(BaseModel): - # base64 encoded png bytes - png: str - - -class URIMaskFeatures(BaseModel): - instanceURI: str - colorRGB: Union[List[int], Tuple[int, int, int]] - - @field_validator("colorRGB") - def validate_color(cls, colorRGB): - # Does the dtype matter? Can it be a float? - if not isinstance(colorRGB, (tuple, list)): - raise ValueError( - f"Received color that is not a list or tuple. Found : {colorRGB}" - ) - elif len(colorRGB) != 3: - raise ValueError( - f"Must provide RGB values for segmentation colors. Found : {colorRGB}" - ) - elif not all([0 <= color <= 255 for color in colorRGB]): - raise ValueError( - f"All rgb colors must be between 0 and 255. Found : {colorRGB}" - ) - return colorRGB - - -class NDMask(NDBaseTool): - ontology_type: Literal["superpixel"] = "superpixel" - mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( - json_schema_extra={"determinant": True} - ) - - -# A union with custom construction logic to improve error messages -class NDTool( - SpecialUnion, - Type[ # type: ignore - Union[ - NDMask, - NDTextEntity, - NDPoint, - NDRectangle, - NDPolyline, - NDPolygon, - ] - ], -): ... - - -class NDAnnotation( - SpecialUnion, - Type[Union[NDTool, NDClassification]], # type: ignore -): - @classmethod - def build(cls: Any, data) -> "NDBase": - if not isinstance(data, dict): - raise ValueError("value must be dict") - errors = [] - for cl in cls.get_union_types(): - try: - return cl(**data) - except KeyError as e: - errors.append(f"{cl.__name__}: {e}") - - raise ValueError( - "Unable to construct any annotation.\n{}".format("\n".join(errors)) - ) - - @classmethod - def schema(cls): - data = {"definitions": {}} - for type_ in cls.get_union_types(): - schema_ = type_.schema() - data["definitions"].update(schema_.pop("definitions")) - data[type_.__name__] = schema_ - return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index 6f8aebc58..dfc87c8a4 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,31 +1,6 @@ from enum import Enum -class BulkImportRequestState(Enum): - """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). - - If you are not usinig MEA continue using BulkImportRequest. - AnnotationImports are in beta and will change soon. - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - State - - Description - * - RUNNING - - Indicates that the import job is not done yet. - * - FAILED - - Indicates the import job failed. Check `BulkImportRequest.errors` for more information - * - FINISHED - - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information - """ - - RUNNING = "RUNNING" - FAILED = "FAILED" - FINISHED = "FINISHED" - - class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f8876f7c4..f2de4db5e 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,36 +5,29 @@ import warnings from collections import namedtuple from datetime import datetime, timezone -from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, - Iterable, List, Optional, Tuple, - TypeVar, Union, overload, ) -from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -import requests -from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -46,7 +39,6 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, - validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -63,7 +55,6 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -72,7 +63,7 @@ ) if TYPE_CHECKING: - from labelbox import BulkImportRequest + pass DataRowPriority = int @@ -579,7 +570,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -814,7 +805,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1088,8 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1436,7 +1426,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1488,33 +1478,6 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] - def bulk_import_requests(self) -> PaginatedCollection: - """Returns bulk import request objects which are used in model-assisted labeling. - These are returned with the oldest first, and most recent last. - """ - - id_param = "project_id" - query_str = """query ListAllImportRequestsPyApi($%s: ID!) { - bulkImportRequests ( - where: { projectId: $%s } - skip: %%d - first: %%d - ) { - %s - } - }""" % ( - id_param, - id_param, - query.results_query_part(Entity.BulkImportRequest), - ) - return PaginatedCollection( - self.client, - query_str, - {id_param: str(self.uid)}, - ["bulkImportRequests"], - Entity.BulkImportRequest, - ) - def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1629,7 +1592,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1639,77 +1602,6 @@ def _wait_for_task(self, task_id: str) -> Task: return task - def upload_annotations( - self, - name: str, - annotations: Union[str, Path, Iterable[Dict]], - validate: bool = False, - ) -> "BulkImportRequest": # type: ignore - """Uploads annotations to a new Editor project. - - Args: - name (str): name of the BulkImportRequest job - annotations (str or Path or Iterable): - url that is publicly accessible by Labelbox containing an - ndjson file - OR local path to an ndjson file - OR iterable of annotation rows - validate (bool): - Whether or not to validate the payload before uploading. - Returns: - BulkImportRequest - """ - - if isinstance(annotations, str) or isinstance(annotations, Path): - - def _is_url_valid(url: Union[str, Path]) -> bool: - """Verifies that the given string is a valid url. - - Args: - url: string to be checked - Returns: - True if the given url is valid otherwise False - - """ - if isinstance(url, Path): - return False - parsed = urlparse(url) - return bool(parsed.scheme) and bool(parsed.netloc) - - if _is_url_valid(annotations): - return Entity.BulkImportRequest.create_from_url( - client=self.client, - project_id=self.uid, - name=name, - url=str(annotations), - validate=validate, - ) - else: - path = Path(annotations) - if not path.exists(): - raise FileNotFoundError( - f"{annotations} is not a valid url nor existing local file" - ) - return Entity.BulkImportRequest.create_from_local_file( - client=self.client, - project_id=self.uid, - name=name, - file=path, - validate_file=validate, - ) - elif isinstance(annotations, Iterable): - return Entity.BulkImportRequest.create_from_objects( - client=self.client, - project_id=self.uid, - name=name, - predictions=annotations, # type: ignore - validate=validate, - ) - else: - raise ValueError( - f"Invalid annotations given of type: {type(annotations)}" - ) - def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index 9abae1422..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,258 +0,0 @@ -from unittest.mock import patch -import uuid -from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index a0df559fc..9e8963a26 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,24 +1,8 @@ -from labelbox.schema.media_type import MediaType -from labelbox.schema.project import Project import pytest - -from labelbox import parser -from pytest_cases import parametrize, fixture_ref +from pytest_cases import fixture_ref, parametrize from labelbox.exceptions import MALValidationError -from labelbox.schema.bulk_import_request import ( - NDChecklist, - NDClassification, - NDMask, - NDPolygon, - NDPolyline, - NDRadio, - NDRectangle, - NDText, - NDTextEntity, - NDTool, - _validate_ndjson, -) +from labelbox.schema.media_type import MediaType """ - These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed @@ -191,39 +175,6 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) -def test_validate_ndjson(tmp_path, configured_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - configured_project.upload_annotations( - name="name", annotations=str(file_path), validate=True - ) - - -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - repeat_uuid[0]["uuid"] = "test_uuid" - repeat_uuid[1]["uuid"] = "test_uuid" - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json deleted file mode 100644 index 4de15e217..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "confidence": 0.8, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [ - { - "schemaId": "ckrb1sfl8099e0y919v260awv", - "confidence": 0.82, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } - ], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json deleted file mode 100644 index 83a95e5bf..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json +++ /dev/null @@ -1,25 +0,0 @@ -[{ - "location": { - "start": 67, - "end": 128 - }, - "messageId": "some-message-id", - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] -}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 91563b8ae..75fe36e44 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,16 +8,17 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - } + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + }, + "classifications": [] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -28,20 +29,17 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + }, + "classifications": [] }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -50,762 +48,39 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 + "x": 10.0, + "y": 20.0 }, { - "x": 1119, - "y": 934 + "x": 15.0, + "y": 20.0 }, { - "x": 1118, - "y": 935 + "x": 20.0, + "y": 25.0 }, { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json deleted file mode 100644 index 591e40cf6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json +++ /dev/null @@ -1,823 +0,0 @@ -[ - { - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "schemaId": "ckrazcueb16og0z6609jj7y3y", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.851, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - { - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "schemaId": "ckrazcuec16ok0z66f956apb7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.834, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } - }, - { - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "schemaId": "ckrazcuec16oi0z66dzrd8pfl", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.986, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "polygon": [ - { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 - } - ] - }, - { - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "schemaId": "ckrazcuec16om0z66bhhh4tp7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "point": { - "x": 2122, - "y": 1457 - } - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 82be4cdab..466a03594 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,826 +1,86 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "box a", + "name": "ckrazcueb16og0z6609jj7y3y", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "confidence": 0.854, + "classifications": [], + "confidence": 0.851, "customMetrics": [ { "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.7 + "value": 0.4 } - ] + ], + "bbox": { + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "mask a", + "name": "ckrazcuec16ok0z66f956apb7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - }, - "confidence": 0.685, + "classifications": [], + "confidence": 0.834, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 - }, - { - "name": "customMetric2", - "value": 0.9 + "value": 0.3 } - ] + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + } }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "polygon a", + "name": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.71, + "confidence": 0.986, "customMetrics": [ { "name": "customMetric1", - "value": 0.1 + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 + "x": 10.0, + "y": 20.0 }, { - "x": 1099, - "y": 911 + "x": 15.0, + "y": 20.0 }, { - "x": 1100, - "y": 911 + "x": 20.0, + "y": 25.0 }, { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "point a", + "name": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.77, - "customMetrics": [ - { - "name": "customMetric2", - "value": 1.2 - } - ], "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json deleted file mode 100644 index 31be5a4c7..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "aggregation": "ARITHMETIC_MEAN", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "metricValue": 0.1 - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json deleted file mode 100644 index f4b4894f6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json +++ /dev/null @@ -1,155 +0,0 @@ -[{ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 4, - "unit": "POINTS", - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 162.73, - "left": 32.45, - "height": 388.16999999999996, - "width": 101.66000000000001 - } -}, { - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "bbox": { - "top": 223.26, - "left": 251.42, - "height": 457.03999999999996, - "width": 186.78 - } -}, { - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 6, - "unit": "POINTS", - "confidence": 0.99, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 32.52, - "left": 218.17, - "height": 231.73, - "width": 110.56000000000003 - } -}, { - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "confidence": 0.89, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 117.39, - "left": 4.25, - "height": 456.9200000000001, - "width": 164.83 - } -}, { - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 8, - "unit": "POINTS", - "bbox": { - "top": 82.13, - "left": 217.28, - "height": 279.76, - "width": 82.43000000000004 - } -}, { - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 3, - "unit": "POINTS", - "bbox": { - "top": 298.12, - "left": 83.34, - "height": 203.83000000000004, - "width": 0.37999999999999545 - } -}, -{ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "named_entity", - "classifications": [], - "textSelections": [ - { - "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - "tokenIds": [ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c" - ], - "page": 1 - } - ] -} -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json deleted file mode 100644 index d6a9eecbd..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - { - "line": [ - { - "x": 2534.353, - "y": 249.471 - }, - { - "x": 2429.492, - "y": 182.092 - }, - { - "x": 2294.322, - "y": 221.962 - } - ], - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-line", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.58, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json deleted file mode 100644 index 1f26d8dc8..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "location": { - "start": 67, - "end": 128 - }, - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json deleted file mode 100644 index 11e0753d9..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json +++ /dev/null @@ -1,166 +0,0 @@ -[{ - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" - }, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{ - "start": 30, - "end": 35 - }, { - "start": 50, - "end": 51 - }] -}, { - "answer": [{ - "schemaId": "ckrb1sfl8099e0y919v260awv" - }], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{ - "start": 0, - "end": 5 - }] -}, { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" -}, { - "classifications": [], - "schemaId": - "cl5islwg200gfci6g0oitaypu", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [{ - "keyframes": [{ - "frame": 1, - "line": [{ - "x": 10.0, - "y": 10.0 - }, { - "x": 100.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }, { - "frame": 5, - "line": [{ - "x": 15.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 8, - "line": [{ - "x": 100.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [{ - "keyframes": [{ - "frame": 1, - "point": { - "x": 10.0, - "y": 10.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 5, - "point": { - "x": 50.0, - "y": 50.0 - }, - "classifications": [] - }, { - "frame": 10, - "point": { - "x": 10.0, - "y": 50.0 - }, - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5iw0roz00lwci6g5jni62vs", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [{ - "keyframes": [{ - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0 - }, - "classifications": [] - }, { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0 - }, - "classifications": [] - }] - }] -}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 0bc3c8924..59f568c75 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,13 +37,6 @@ def test_serialization_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_classification(): label = Label( @@ -134,12 +127,6 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested(): label = Label( @@ -233,13 +220,6 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested_2(): label = Label( @@ -330,9 +310,3 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 8dcb17f0b..82adce99c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,15 +1,73 @@ import json +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + Radio, + Text, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ClassificationAnnotation, + ClassificationAnswer, +) +from labelbox.data.mixins import CustomMetric + def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.8, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.82, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data @@ -18,6 +76,48 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + name="classification a", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="choice 1", + ), + ), + ), + ClassificationAnnotation( + name="classification b", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.945, + name="choice 2", + ) + ], + ), + ), + ClassificationAnnotation( + name="classification c", + extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index f7da9181b..561f9ce86 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,8 +1,12 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -99,25 +103,62 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - deserialized_label = list(NDJsonConverter().deserialize(ndjson)) - deserialized_label[0].annotations[0].extra.pop("uuid") - assert deserialized_label[0].model_dump(exclude_none=True) == label[ - 0 - ].model_dump(exclude_none=True) +def test_conversation_entity_import(): + with open( + "tests/data/assets/ndjson/conversation_entity_import.json", "r" + ) as file: + data = json.load(file) -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/conversation_entity_import.json", + label = lb_types.Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) + assert res == data + + +def test_conversation_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - ], -) -def test_conversation_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = lb_types.Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, extra={}, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 333c00250..999e1bda5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,67 +1,29 @@ -from copy import copy -import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import ( - NDDicomSegments, - NDDicomSegment, - NDDicomLine, -) - -""" -Data gen prompt test data -""" - -prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), -) - -prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, -} - -data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], -) - -""" -Prompt annotation test -""" def test_serialize_label(): - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) - # Remove uuid field since this is a random value that can not be specified also meant for relationships - del serialized_label["uuid"] - assert serialized_label == prompt_text_ndjson - - -def test_deserialize_label(): - deserialized_label = next( - NDJsonConverter().deserialize([prompt_text_ndjson]) + prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + extra={"uuid": "test"}, + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), ) - if hasattr(deserialized_label.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized_label.annotations[0].extra = {} - assert deserialized_label.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "test", + } + + data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], + ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) -def test_serialize_deserialize_label(): - serialized = list(NDJsonConverter.serialize([data_gen_label])) - deserialized = next(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized.annotations[0].extra = {} - assert deserialized.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + assert serialized_label == prompt_text_ndjson diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 633214367..762891aa2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,6 +1,5 @@ from copy import copy import pytest -import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -181,28 +180,3 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson - - -@pytest.mark.parametrize("label, ndjson", labels_ndjsons) -def test_deserialize_label(label, ndjson): - deserialized_label = next(NDJsonConverter().deserialize([ndjson])) - if hasattr(deserialized_label.annotations[0], "extra"): - deserialized_label.annotations[0].extra = {} - for i, annotation in enumerate(deserialized_label.annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value - - -@pytest.mark.parametrize("label", labels) -def test_serialize_deserialize_label(label): - serialized = list(NDJsonConverter.serialize([label])) - deserialized = list(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized[0].annotations[0], "extra"): - deserialized[0].annotations[0].extra = {} - for i, annotation in enumerate(deserialized[0].annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index 5fe6a9789..a0897ad9f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,6 +1,19 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + RectangleUnit, + Point, + DocumentRectangle, + DocumentEntity, + DocumentTextSelection, +) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -53,10 +66,144 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.89, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_with_name_only(): @@ -65,26 +212,135 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id=None, + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.74, + name="boxy", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson - - -def test_pdf_bbox_deserialize(): - deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) - deserialized[0].annotations[0].extra = {} - assert ( - deserialized[0].annotations[0].value - == bbox_labels[0].annotations[0].value - ) - assert ( - deserialized[0].annotations[0].name - == bbox_labels[0].annotations[0].name - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 4adcd9935..1ab678cde 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,8 +9,6 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", - file_path=None, - frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -22,6 +20,7 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", + "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -588,31 +587,4 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - manual_label = video_serialized_bbox_label() - - for key in label.keys(): - # ignore uuid because we randomize if there was none - if key != "uuid": - assert label[key] == manual_label[key] - - assert len(label["segments"]) == 2 - assert len(label["segments"][0]["keyframes"]) == 2 - assert len(label["segments"][1]["keyframes"]) == 4 - - # #converts back only the keyframes. should be the sum of all prev segments - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - assert len(label.annotations) == 6 - - -def test_confidence_is_ignored(): - label = video_bbox_label() - serialized_labels = NDJsonConverter.serialize([label]) - label = next(serialized_labels) - label["confidence"] = 0.453 - label["segments"][0]["confidence"] = 0.453 - - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - for annotation in label.annotations: - assert annotation.confidence is None + assert label == video_serialized_bbox_label() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 84c017497..349be13a8 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,16 +34,6 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "text_answer" - assert annotation_value.confidence == 0.5 - def test_nested_serialization(): label = Label( @@ -102,19 +92,3 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - answer = annotation.value.answer[0] - assert answer.confidence == 0.9 - assert answer.name == "first_answer" - - classification_answer = answer.classifications[0].value.answer - assert classification_answer.confidence == 0.8 - assert classification_answer.name == "first_sub_radio_answer" - - sub_classification_answer = classification_answer.classifications[0].value - assert type(sub_classification_answer) is Text - assert sub_classification_answer.answer == "nested answer" - assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index 2b3fa7f8c..d104a691e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,73 +1,74 @@ -import json -import pytest - -from labelbox.data.serialization.ndjson.classification import NDRadio - +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import NDLine - - -def round_dict(data): - if isinstance(data, dict): - for key in data: - if isinstance(data[key], float): - data[key] = int(data[key]) - elif isinstance(data[key], dict): - data[key] = round_dict(data[key]) - elif isinstance(data[key], (list, tuple)): - data[key] = [round_dict(r) for r in data[key]] +from labelbox.types import ( + Label, + ClassificationAnnotation, + Radio, + ClassificationAnswer, +) - return data +def test_generic_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/classification_import_global_key.json", - "tests/data/assets/ndjson/metric_import_global_key.json", - "tests/data/assets/ndjson/polyline_import_global_key.json", - "tests/data/assets/ndjson/text_entity_import_global_key.json", - "tests/data/assets/ndjson/conversation_entity_import_global_key.json", - ], -) -def test_many_types(filename: str): - with open(filename, "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data - f.close() + label = Label( + data=GenericDataRowData( + global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) + res = list(NDJsonConverter.serialize([label])) -def test_image(): - with open( - "tests/data/assets/ndjson/image_import_global_key.json", "r" - ) as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() + assert res == expected -def test_pdf(): - with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() +def test_dict_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] + label = Label( + data={ + "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", + }, + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) -def test_video(): - with open( - "tests/data/assets/ndjson/video_import_global_key.json", "r" - ) as f: - data = json.load(f) + res = list(NDJsonConverter.serialize([label])) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] - f.close() + assert res == expected diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 1729e1f46..d67acb9c3 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,4 +1,8 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -10,6 +14,7 @@ ImageData, MaskData, ) +from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -29,12 +34,74 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_image_with_name_only(): @@ -43,11 +110,74 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + name="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + name="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + name="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + name="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_mask(): @@ -57,10 +187,11 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], + "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -68,16 +199,54 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [255, 0, 0], + "colorRGB": (255, 0, 0), }, + "classifications": [], }, ] - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) + mask_numpy = mask_numpy.astype(np.uint8) + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.8, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Mask( + mask=MaskData(arr=mask_numpy), + color=(1, 1, 1), + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + extra={}, + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=(255, 0, 0), + ), + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + + assert res == data def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 45c5c67bf..40e098405 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,38 +1,166 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.metrics.confusion_matrix import ( + ConfusionMatrixMetric, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ScalarMetric, + ScalarMetricAggregation, + ConfusionMatrixAggregation, +) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert reserialized == data + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + assert res == data def test_custom_scalar_metric(): - with open( - "tests/data/assets/ndjson/custom_scalar_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: 0.1, 0.2: 0.5}, + "metricName": "custom_iou", + "aggregation": "SUM", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value=0.1, + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value={"0.1": 0.1, "0.2": 0.5}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert res == data def test_custom_confusion_matrix_metric(): - with open( - "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (1, 1, 2, 3), + "metricName": "50%_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (0, 1, 2, 5), + "metricName": "50%_iou", + "featureName": "sample_class", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + "metricName": "50%_iou", + "aggregation": "CONFUSION_MATRIX", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ConfusionMatrixMetric( + value=(1, 1, 2, 3), + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value=(0, 1, 2, 5), + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert data == res diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 69594ff73..202f793fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,32 +1,125 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest from labelbox.data.serialization import NDJsonConverter +from labelbox.types import ( + Label, + MessageEvaluationTaskAnnotation, + MessageSingleSelectionTask, + MessageMultiSelectionTask, + MessageInfo, + OrderedMessageInfo, + MessageRankingTask, +) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - deserialized = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(deserialized)) + labels = [ + Label( + data=GenericDataRowData( + uid="cnjencjencjfencvj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="single-selection", + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + value=MessageSingleSelectionTask( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + parent_message_id="clxfznjb800073b6v43ppx9ca", + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cfcerfvergerfefj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="multi-selection", + extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, + value=MessageMultiSelectionTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + selected_messages=[ + MessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + ) + ], + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=2, + ), + ], + ), + ) + ], + ), + ] - assert data == reserialized + res = list(NDJsonConverter.serialize(labels)) + assert res == data -def test_mesage_ranking_task_wrong_order_serialization(): - with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: - data = json.load(file) - - some_ranking_task = next( - task - for task in data - if task["messageEvaluationTask"]["format"] == "message-ranking" - ) - some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ - "order" - ] = 3 +def test_mesage_ranking_task_wrong_order_serialization(): with pytest.raises(ValueError): - list(NDJsonConverter.deserialize([some_ranking_task])) + ( + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={ + "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" + }, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + ], + ), + ) + ], + ), + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py deleted file mode 100644 index 790bd87b3..000000000 --- a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from labelbox.data.serialization.ndjson.label import NDLabel -from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle -import pytest - - -def test_bad_annotation_input(): - data = [{"test": 3}] - with pytest.raises(ValueError): - NDLabel(**{"annotations": data}) - - -def test_correct_annotation_input(): - with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: - data = json.load(f) - assert isinstance( - NDLabel(**{"annotations": [data[0]]}).annotations[0], - NDDocumentRectangle, - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index e0f0df0e6..3633c9cbe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,13 +1,135 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Rectangle, + Point, + ClassificationAnnotation, + Radio, + ClassificationAnswer, + Text, + Checklist, +) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.34, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "5d03213e-4408-456c-9eca-cf0723202961", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.894, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={}, + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -16,6 +138,112 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="box a", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.811, + name="first answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box b", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification b", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.815, + name="second answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification c", + value=Checklist( + answer=[ + ClassificationAnswer( + name="third answer", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="a string", + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index 97d48a14e..cd11d97fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,18 +1,76 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ObjectAnnotation, Point, Line, Label -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/polyline_without_confidence_import.json", - "tests/data/assets/ndjson/polyline_import.json", - ], -) -def test_polyline_import(filename: str): - with open(filename, "r") as file: +def test_polyline_import_with_confidence(): + with open( + "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" + ) as file: + data = json.load(file) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data + + +def test_polyline_import_without_confidence(): + with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.58, + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index bd80f9267..4458e335c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -40,14 +39,6 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_radio_classification(): label = Label( @@ -101,10 +92,3 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert res.annotations[0].model_dump( - exclude_none=True - ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 66630dbb5..0e42ab152 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,6 +1,10 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -8,8 +12,26 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + }, + value=Rectangle( + start=Point(x=38.0, y=28.0), + end=Point(x=81.0, y=69.0), + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -39,8 +61,6 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, }, ) @@ -48,8 +68,9 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + + assert res == data def test_rectangle_mixed_start_end_points(): @@ -76,17 +97,13 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, - }, + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + assert res == data def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index f33719035..235b66957 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,16 +1,135 @@ import json -from uuid import uuid4 -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Point, + Rectangle, + RelationshipAnnotation, + Relationship, +) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) + res = [ + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + extra={}, + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl-DIFFERENT", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + ] res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -44,29 +163,3 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] - - -def test_relationship_nonexistent_object(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - relationship_annotation = data[2] - source_uuid = relationship_annotation["relationship"]["source"] - target_uuid = str(uuid4()) - relationship_annotation["relationship"]["target"] = target_uuid - error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" - - with pytest.raises(ValueError, match=error_msg): - list(NDJsonConverter.deserialize(data)) - - -def test_relationship_duplicate_uuids(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - source, target = data[0], data[1] - target["uuid"] = source["uuid"] - error_msg = f"UUID '{source['uuid']}' is not unique" - - with pytest.raises(AssertionError, match=error_msg): - list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index d5e81c51a..21db389cb 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,7 +1,5 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( - ClassificationAnswer, - Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -34,11 +32,3 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index 3e856f001..fb93f15d4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,21 +1,68 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, TextEntity + + +def test_text_entity_import(): + with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: + data = json.load(file) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/text_entity_import.json", +def test_text_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - ], -) -def test_text_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index c7a6535c4..4fba5c2ca 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json -from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, + Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,8 +13,10 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import VideoObjectAnnotation -from labelbox import parser +from labelbox.data.annotation_types.video import ( + VideoClassificationAnnotation, + VideoObjectAnnotation, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -24,15 +26,275 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_name_only(): @@ -40,16 +302,274 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + name="question 3", + extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_classification_global_subclassifications(): @@ -67,7 +587,6 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( - name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -94,7 +613,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = nested_checklist_annotation_ndjson = { + expected_second_annotation = { "name": "nested_checklist_question", "answer": [ { @@ -116,12 +635,6 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - assert annotation.name == label.annotations[i].name - def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -287,14 +800,6 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_video_classification_point(): bbox_annotation = [ @@ -445,13 +950,6 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - def test_video_classification_frameline(): bbox_annotation = [ @@ -619,9 +1117,289 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value + +[ + { + "answer": "a value", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 5, "start": 0}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5islwg200gfci6g0oitaypu", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + { + "classifications": [], + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + } + ] + }, + ], + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + } + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + }, + { + "classifications": [], + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + }, + ] + }, + ], + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "segments": [ + { + "keyframes": [ + { + "bbox": { + "height": 100.0, + "left": 5.0, + "top": 10.0, + "width": 150.0, + }, + "classifications": [], + "frame": 1, + }, + { + "bbox": { + "height": 50.0, + "left": 5.0, + "top": 30.0, + "width": 150.0, + }, + "classifications": [], + "frame": 5, + }, + ] + }, + { + "keyframes": [ + { + "bbox": { + "height": 400.0, + "left": 200.0, + "top": 300.0, + "width": 150.0, + }, + "classifications": [], + "frame": 10, + } + ] + }, + ], + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + }, +] + +[ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{"start": 0, "end": 5}], + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "classifications": [], + "schemaId": "cl5islwg200gfci6g0oitaypu", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + { + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + } + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + "classifications": [], + } + ] + }, + { + "keyframes": [ + { + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + "classifications": [], + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + "classifications": [], + }, + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0, + }, + "classifications": [], + }, + { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0, + }, + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0, + }, + "classifications": [], + } + ] + }, + ], + }, +] From 2c0c6773f8abdac0928a325d75f709892a92a13d Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:22:02 -0700 Subject: [PATCH 05/44] Revert "Vb/merge 5.0.0 (#1826)" (#1827) --- libs/labelbox/src/labelbox/__init__.py | 1 + libs/labelbox/src/labelbox/orm/model.py | 1 + libs/labelbox/src/labelbox/schema/__init__.py | 21 +- .../labelbox/schema/bulk_import_request.py | 1004 +++++++++++++++++ libs/labelbox/src/labelbox/schema/enums.py | 25 + libs/labelbox/src/labelbox/schema/project.py | 120 +- .../test_bulk_import_request.py | 258 +++++ .../test_ndjson_validation.py | 53 +- .../classification_import_global_key.json | 54 + ...conversation_entity_import_global_key.json | 25 + .../data/assets/ndjson/image_import.json | 779 ++++++++++++- .../ndjson/image_import_global_key.json | 823 ++++++++++++++ .../assets/ndjson/image_import_name_only.json | 810 ++++++++++++- .../ndjson/metric_import_global_key.json | 10 + .../assets/ndjson/pdf_import_global_key.json | 155 +++ .../ndjson/polyline_import_global_key.json | 36 + .../ndjson/text_entity_import_global_key.json | 26 + .../ndjson/video_import_global_key.json | 166 +++ .../serialization/ndjson/test_checklist.py | 26 + .../ndjson/test_classification.py | 108 +- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 + .../serialization/ndjson/test_document.py | 294 +---- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 + .../serialization/ndjson/test_global_key.py | 125 +- .../data/serialization/ndjson/test_image.py | 203 +--- .../data/serialization/ndjson/test_metric.py | 170 +-- .../data/serialization/ndjson/test_mmc.py | 125 +- .../ndjson/test_ndlabel_subclass_matching.py | 19 + .../data/serialization/ndjson/test_nested.py | 236 +--- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 + .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 +-- .../data/serialization/ndjson/test_text.py | 10 + .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +------------- 39 files changed, 4767 insertions(+), 2380 deletions(-) create mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py create mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py create mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index f9b82b422..5b5ac1f67 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,6 +6,7 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 1f3ee1d86..84dcac774 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,6 +386,7 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] + BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index e57c04a29..03327e0d1 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,28 +1,29 @@ -import labelbox.schema.annotation_import import labelbox.schema.asset_attachment -import labelbox.schema.batch +import labelbox.schema.bulk_import_request +import labelbox.schema.annotation_import import labelbox.schema.benchmark -import labelbox.schema.catalog import labelbox.schema.data_row -import labelbox.schema.data_row_metadata import labelbox.schema.dataset -import labelbox.schema.iam_integration -import labelbox.schema.identifiable -import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service -import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology -import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project -import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook +import labelbox.schema.data_row_metadata +import labelbox.schema.batch +import labelbox.schema.iam_integration +import labelbox.schema.media_type +import labelbox.schema.identifiables +import labelbox.schema.identifiable +import labelbox.schema.catalog +import labelbox.schema.ontology_kind +import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py new file mode 100644 index 000000000..8e11f3261 --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/bulk_import_request.py @@ -0,0 +1,1004 @@ +import json +import time +from uuid import UUID, uuid4 +import functools + +import logging +from pathlib import Path +from google.api_core import retry +from labelbox import parser +import requests +from pydantic import ( + ValidationError, + BaseModel, + Field, + field_validator, + model_validator, + ConfigDict, + StringConstraints, +) +from typing_extensions import Literal, Annotated +from typing import ( + Any, + List, + Optional, + BinaryIO, + Dict, + Iterable, + Tuple, + Union, + Type, + Set, + TYPE_CHECKING, +) + +from labelbox import exceptions as lb_exceptions +from labelbox import utils +from labelbox.orm import query +from labelbox.orm.db_object import DbObject +from labelbox.orm.model import Relationship +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.serialization import serialize_labels +from labelbox.orm.model import Field as lb_Field + +if TYPE_CHECKING: + from labelbox import Project + from labelbox.types import Label + +NDJSON_MIME_TYPE = "application/x-ndjson" +logger = logging.getLogger(__name__) + +# TODO: Deprecate this library in place of labelimport and malprediction import library. + + +def _determinants(parent_cls: Any) -> List[str]: + return [ + k + for k, v in parent_cls.model_fields.items() + if v.json_schema_extra and "determinant" in v.json_schema_extra + ] + + +def _make_file_name(project_id: str, name: str) -> str: + return f"{project_id}__{name}.ndjson" + + +# TODO(gszpak): move it to client.py +def _make_request_data( + project_id: str, name: str, content_length: int, file_name: str +) -> dict: + query_str = """mutation createBulkImportRequestFromFilePyApi( + $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + filePayload: { + file: $file, + contentLength: $contentLength + } + }) { + %s + } + } + """ % query.results_query_part(BulkImportRequest) + variables = { + "projectId": project_id, + "name": name, + "file": None, + "contentLength": content_length, + } + operations = json.dumps({"variables": variables, "query": query_str}) + + return { + "operations": operations, + "map": (None, json.dumps({file_name: ["variables.file"]})), + } + + +def _send_create_file_command( + client, + request_data: dict, + file_name: str, + file_data: Tuple[str, Union[bytes, BinaryIO], str], +) -> dict: + response = client.execute(data=request_data, files={file_name: file_data}) + + if not response.get("createBulkImportRequest", None): + raise lb_exceptions.LabelboxError( + "Failed to create BulkImportRequest, message: %s" + % response.get("errors", None) + or response.get("error", None) + ) + + return response + + +class BulkImportRequest(DbObject): + """Represents the import job when importing annotations. + + Attributes: + name (str) + state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) + input_file_url (str): URL to your web-hosted NDJSON file + error_file_url (str): NDJSON that contains error messages for failed annotations + status_file_url (str): NDJSON that contains status for each annotation + created_at (datetime): UTC timestamp for date BulkImportRequest was created + + project (Relationship): `ToOne` relationship to Project + created_by (Relationship): `ToOne` relationship to User + """ + + name = lb_Field.String("name") + state = lb_Field.Enum(BulkImportRequestState, "state") + input_file_url = lb_Field.String("input_file_url") + error_file_url = lb_Field.String("error_file_url") + status_file_url = lb_Field.String("status_file_url") + created_at = lb_Field.DateTime("created_at") + + project = Relationship.ToOne("Project") + created_by = Relationship.ToOne("User", False, "created_by") + + @property + def inputs(self) -> List[Dict[str, Any]]: + """ + Inputs for each individual annotation uploaded. + This should match the ndjson annotations that you have uploaded. + + Returns: + Uploaded ndjson. + + * This information will expire after 24 hours. + """ + return self._fetch_remote_ndjson(self.input_file_url) + + @property + def errors(self) -> List[Dict[str, Any]]: + """ + Errors for each individual annotation uploaded. This is a subset of statuses + + Returns: + List of dicts containing error messages. Empty list means there were no errors + See `BulkImportRequest.statuses` for more details. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.error_file_url) + + @property + def statuses(self) -> List[Dict[str, Any]]: + """ + Status for each individual annotation uploaded. + + Returns: + A status for each annotation if the upload is done running. + See below table for more details + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - Field + - Description + * - uuid + - Specifies the annotation for the status row. + * - dataRow + - JSON object containing the Labelbox data row ID for the annotation. + * - status + - Indicates SUCCESS or FAILURE. + * - errors + - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.status_file_url) + + @functools.lru_cache() + def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: + """ + Fetches the remote ndjson file and caches the results. + + Args: + url (str): Can be any url pointing to an ndjson file. + Returns: + ndjson as a list of dicts. + """ + response = requests.get(url) + response.raise_for_status() + return parser.loads(response.text) + + def refresh(self) -> None: + """Synchronizes values of all fields with the database.""" + query_str, params = query.get_single(BulkImportRequest, self.uid) + res = self.client.execute(query_str, params) + res = res[utils.camel_case(BulkImportRequest.type_name())] + self._set_field_values(res) + + def wait_till_done(self, sleep_time_seconds: int = 5) -> None: + self.wait_until_done(sleep_time_seconds) + + def wait_until_done(self, sleep_time_seconds: int = 5) -> None: + """Blocks import job until certain conditions are met. + + Blocks until the BulkImportRequest.state changes either to + `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, + periodically refreshing object's state. + + Args: + sleep_time_seconds (str): a time to block between subsequent API calls + """ + while self.state == BulkImportRequestState.RUNNING: + logger.info(f"Sleeping for {sleep_time_seconds} seconds...") + time.sleep(sleep_time_seconds) + self.__exponential_backoff_refresh() + + @retry.Retry( + predicate=retry.if_exception_type( + lb_exceptions.ApiLimitError, + lb_exceptions.TimeoutError, + lb_exceptions.NetworkError, + ) + ) + def __exponential_backoff_refresh(self) -> None: + self.refresh() + + @classmethod + def from_name( + cls, client, project_id: str, name: str + ) -> "BulkImportRequest": + """Fetches existing BulkImportRequest. + + Args: + client (Client): a Labelbox client + project_id (str): BulkImportRequest's project id + name (str): name of BulkImportRequest + Returns: + BulkImportRequest object + + """ + query_str = """query getBulkImportRequestPyApi( + $projectId: ID!, $name: String!) { + bulkImportRequest(where: { + projectId: $projectId, + name: $name + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name} + response = client.execute(query_str, params=params) + return cls(client, response["bulkImportRequest"]) + + @classmethod + def create_from_url( + cls, client, project_id: str, name: str, url: str, validate=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a publicly accessible URL + to an ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + url (str): publicly accessible URL pointing to ndjson file containing predictions + validate (bool): a flag indicating if there should be a validation + if `url` is valid ndjson + Returns: + BulkImportRequest object + """ + if validate: + logger.warn( + "Validation is turned on. The file will be downloaded locally and processed before uploading." + ) + res = requests.get(url) + data = parser.loads(res.text) + _validate_ndjson(data, client.get_project(project_id)) + + query_str = """mutation createBulkImportRequestPyApi( + $projectId: ID!, $name: String!, $fileUrl: String!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + fileUrl: $fileUrl + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name, "fileUrl": url} + bulk_import_request_response = client.execute(query_str, params=params) + return cls( + client, bulk_import_request_response["createBulkImportRequest"] + ) + + @classmethod + def create_from_objects( + cls, + client, + project_id: str, + name: str, + predictions: Union[Iterable[Dict], Iterable["Label"]], + validate=True, + ) -> "BulkImportRequest": + """ + Creates a `BulkImportRequest` from an iterable of dictionaries. + + Conforms to JSON predictions format, e.g.: + ``{ + "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", + "schemaId": "ckappz7d700gn0zbocmqkwd9i", + "dataRow": { + "id": "ck1s02fqxm8fi0757f0e6qtdc" + }, + "bbox": { + "top": 48, + "left": 58, + "height": 865, + "width": 1512 + } + }`` + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + predictions (Iterable[dict]): iterable of dictionaries representing predictions + validate (bool): a flag indicating if there should be a validation + if `predictions` is valid ndjson + Returns: + BulkImportRequest object + """ + if not isinstance(predictions, list): + raise TypeError( + f"annotations must be in a form of Iterable. Found {type(predictions)}" + ) + ndjson_predictions = serialize_labels(predictions) + + if validate: + _validate_ndjson(ndjson_predictions, client.get_project(project_id)) + + data_str = parser.dumps(ndjson_predictions) + if not data_str: + raise ValueError("annotations cannot be empty") + + data = data_str.encode("utf-8") + file_name = _make_file_name(project_id, name) + request_data = _make_request_data( + project_id, name, len(data_str), file_name + ) + file_data = (file_name, data, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, + request_data=request_data, + file_name=file_name, + file_data=file_data, + ) + + return cls(client, response_data["createBulkImportRequest"]) + + @classmethod + def create_from_local_file( + cls, client, project_id: str, name: str, file: Path, validate_file=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a local ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + file (Path): local ndjson file with predictions + validate_file (bool): a flag indicating if there should be a validation + if `file` is a valid ndjson file + Returns: + BulkImportRequest object + + """ + file_name = _make_file_name(project_id, name) + content_length = file.stat().st_size + request_data = _make_request_data( + project_id, name, content_length, file_name + ) + + with file.open("rb") as f: + if validate_file: + reader = parser.reader(f) + # ensure that the underlying json load call is valid + # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 + # by iterating through the file so we only store + # each line in memory rather than the entire file + try: + _validate_ndjson(reader, client.get_project(project_id)) + except ValueError: + raise ValueError(f"{file} is not a valid ndjson file") + else: + f.seek(0) + file_data = (file.name, f, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, request_data, file_name, file_data + ) + return cls(client, response_data["createBulkImportRequest"]) + + def delete(self) -> None: + """Deletes the import job and also any annotations created by this import. + + Returns: + None + """ + id_param = "bulk_request_id" + query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { + deleteBulkImportRequest(where: {id: $%s}) { + id + name + } + }""" % (id_param, id_param) + self.client.execute(query_str, {id_param: self.uid}) + + +def _validate_ndjson( + lines: Iterable[Dict[str, Any]], project: "Project" +) -> None: + """ + Client side validation of an ndjson object. + + Does not guarentee that an upload will succeed for the following reasons: + * We are not checking the data row types which will cause the following errors to slip through + * Missing frame indices will not causes an error for videos + * Uploaded annotations for the wrong data type will pass (Eg. entity on images) + * We are not checking bounds of an asset (Eg. frame index, image height, text location) + + Args: + lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines + project (Project): id of project for which predictions will be imported + + Raises: + MALValidationError: Raise for invalid NDJson + UuidError: Duplicate UUID in upload + """ + feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( + project.ontology() + ) + uids: Set[str] = set() + for idx, line in enumerate(lines): + try: + annotation = NDAnnotation(**line) + annotation.validate_instance( + feature_schemas_by_id, feature_schemas_by_name + ) + uuid = str(annotation.uuid) + if uuid in uids: + raise lb_exceptions.UuidError( + f"{uuid} already used in this import job, " + "must be unique for the project." + ) + uids.add(uuid) + except (ValidationError, ValueError, TypeError, KeyError) as e: + raise lb_exceptions.MALValidationError( + f"Invalid NDJson on line {idx}" + ) from e + + +# The rest of this file contains objects for MAL validation +def parse_classification(tool): + """ + Parses a classification from an ontology. Only radio, checklist, and text are supported for mal + + Args: + tool (dict) + + Returns: + dict + """ + if tool["type"] in ["radio", "checklist"]: + option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] + option_names = [r["value"] for r in tool["options"]] + return { + "tool": tool["type"], + "featureSchemaId": tool["featureSchemaId"], + "name": tool["name"], + "options": [*option_schema_ids, *option_names], + } + elif tool["type"] == "text": + return { + "tool": tool["type"], + "name": tool["name"], + "featureSchemaId": tool["featureSchemaId"], + } + + +def get_mal_schemas(ontology): + """ + Converts a project ontology to a dict for easier lookup during ndjson validation + + Args: + ontology (Ontology) + Returns: + Dict, Dict : Useful for looking up a tool from a given feature schema id or name + """ + + valid_feature_schemas_by_schema_id = {} + valid_feature_schemas_by_name = {} + for tool in ontology.normalized["tools"]: + classifications = [ + parse_classification(classification_tool) + for classification_tool in tool["classifications"] + ] + classifications_by_schema_id = { + v["featureSchemaId"]: v for v in classifications + } + classifications_by_name = {v["name"]: v for v in classifications} + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + valid_feature_schemas_by_name[tool["name"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + for tool in ontology.normalized["classifications"]: + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( + parse_classification(tool) + ) + valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) + return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name + + +class Bbox(BaseModel): + top: float + left: float + height: float + width: float + + +class Point(BaseModel): + x: float + y: float + + +class FrameLocation(BaseModel): + end: int + start: int + + +class VideoSupported(BaseModel): + # Note that frames are only allowed as top level inferences for video + frames: Optional[List[FrameLocation]] = None + + +# Base class for a special kind of union. +class SpecialUnion: + def __new__(cls, **kwargs): + return cls.build(kwargs) + + @classmethod + def __get_validators__(cls): + yield cls.build + + @classmethod + def get_union_types(cls): + if not issubclass(cls, SpecialUnion): + raise TypeError("{} must be a subclass of SpecialUnion") + + union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] + if len(union_types) < 1: + raise TypeError( + "Class {cls} should inherit from a union of objects to build" + ) + if len(union_types) > 1: + raise TypeError( + f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" + ) + return union_types[0].__args__[0].__args__ + + @classmethod + def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": + """ + Checks through all objects in the union to see which matches the input data. + Args: + data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union + raises: + KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion + ValidationError: Error while trying to construct a specific object in the union + + """ + if isinstance(data, BaseModel): + data = data.model_dump() + + top_level_fields = [] + max_match = 0 + matched = None + + for type_ in cls.get_union_types(): + determinate_fields = _determinants(type_) + top_level_fields.append(determinate_fields) + matches = sum([val in determinate_fields for val in data]) + if matches == len(determinate_fields) and matches > max_match: + max_match = matches + matched = type_ + + if matched is not None: + # These two have the exact same top level keys + if matched in [NDRadio, NDText]: + if isinstance(data["answer"], dict): + matched = NDRadio + elif isinstance(data["answer"], str): + matched = NDText + else: + raise TypeError( + f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" + ) + return matched(**data) + else: + raise KeyError( + f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." + ) + + @classmethod + def schema(cls): + results = {"definitions": {}} + for cl in cls.get_union_types(): + schema = cl.schema() + results["definitions"].update(schema.pop("definitions")) + results[cl.__name__] = schema + return results + + +class DataRow(BaseModel): + id: str + + +class NDFeatureSchema(BaseModel): + schemaId: Optional[str] = None + name: Optional[str] = None + + @model_validator(mode="after") + def most_set_one(self): + if self.schemaId is None and self.name is None: + raise ValueError( + "Must set either schemaId or name for all feature schemas" + ) + return self + + +class NDBase(NDFeatureSchema): + ontology_type: str + uuid: UUID + dataRow: DataRow + model_config = ConfigDict(extra="forbid") + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + if self.name: + if self.name not in valid_feature_schemas_by_name: + raise ValueError( + f"Name {self.name} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_name[self.name]["tool"] + ): + raise ValueError( + f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" + ) + + if self.schemaId: + if self.schemaId not in valid_feature_schemas_by_id: + raise ValueError( + f"Schema id {self.schemaId} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_id[self.schemaId]["tool"] + ): + raise ValueError( + f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" + ) + + def validate_instance( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + self.validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + + +###### Classifications ###### + + +class NDText(NDBase): + ontology_type: Literal["text"] = "text" + answer: str = Field(json_schema_extra={"determinant": True}) + # No feature schema to check + + +class NDChecklist(VideoSupported, NDBase): + ontology_type: Literal["checklist"] = "checklist" + answers: List[NDFeatureSchema] = Field( + json_schema_extra={"determinant": True} + ) + + @field_validator("answers", mode="before") + def validate_answers(cls, value, field): + # constr not working with mypy. + if not len(value): + raise ValueError("Checklist answers should not be empty") + return value + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + # Test top level feature schema for this tool + super(NDChecklist, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + # Test the feature schemas provided to the answer field + if len( + set([answer.name or answer.schemaId for answer in self.answers]) + ) != len(self.answers): + raise ValueError( + f"Duplicated featureSchema found for checklist {self.uuid}" + ) + for answer in self.answers: + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if answer.name not in options and answer.schemaId not in options: + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" + ) + + +class NDRadio(VideoSupported, NDBase): + ontology_type: Literal["radio"] = "radio" + answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDRadio, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if ( + self.answer.name not in options + and self.answer.schemaId not in options + ): + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" + ) + + +# A union with custom construction logic to improve error messages +class NDClassification( + SpecialUnion, + Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore +): ... + + +###### Tools ###### + + +class NDBaseTool(NDBase): + classifications: List[NDClassification] = [] + + # This is indepdent of our problem + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDBaseTool, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + for classification in self.classifications: + classification.validate_feature_schemas( + valid_feature_schemas_by_name[self.name][ + "classificationsBySchemaId" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsBySchemaId" + ], + valid_feature_schemas_by_name[self.name][ + "classificationsByName" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsByName" + ], + ) + + @field_validator("classifications", mode="before") + def validate_subclasses(cls, value, field): + # Create uuid and datarow id so we don't have to define classification objects twice + # This is caused by the fact that we require these ids for top level classifications but not for subclasses + results = [] + dummy_id = "child".center(25, "_") + for row in value: + results.append( + {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} + ) + return results + + +class NDPolygon(NDBaseTool): + ontology_type: Literal["polygon"] = "polygon" + polygon: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("polygon") + def is_geom_valid(cls, v): + if len(v) < 3: + raise ValueError( + f"A polygon must have at least 3 points to be valid. Found {v}" + ) + return v + + +class NDPolyline(NDBaseTool): + ontology_type: Literal["line"] = "line" + line: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("line") + def is_geom_valid(cls, v): + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + return v + + +class NDRectangle(NDBaseTool): + ontology_type: Literal["rectangle"] = "rectangle" + bbox: Bbox = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class NDPoint(NDBaseTool): + ontology_type: Literal["point"] = "point" + point: Point = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class EntityLocation(BaseModel): + start: int + end: int + + +class NDTextEntity(NDBaseTool): + ontology_type: Literal["named-entity"] = "named-entity" + location: EntityLocation = Field(json_schema_extra={"determinant": True}) + + @field_validator("location") + def is_valid_location(cls, v): + if isinstance(v, BaseModel): + v = v.model_dump() + + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + if v["start"] < 0: + raise ValueError(f"Text location must be positive. Found {v}") + if v["start"] > v["end"]: + raise ValueError( + f"Text start location must be less or equal than end. Found {v}" + ) + return v + + +class RLEMaskFeatures(BaseModel): + counts: List[int] + size: List[int] + + @field_validator("counts") + def validate_counts(cls, counts): + if not all([count >= 0 for count in counts]): + raise ValueError( + "Found negative value for counts. They should all be zero or positive" + ) + return counts + + @field_validator("size") + def validate_size(cls, size): + if len(size) != 2: + raise ValueError( + f"Mask `size` should have two ints representing height and with. Found : {size}" + ) + if not all([count > 0 for count in size]): + raise ValueError( + f"Mask `size` should be a postitive int. Found : {size}" + ) + return size + + +class PNGMaskFeatures(BaseModel): + # base64 encoded png bytes + png: str + + +class URIMaskFeatures(BaseModel): + instanceURI: str + colorRGB: Union[List[int], Tuple[int, int, int]] + + @field_validator("colorRGB") + def validate_color(cls, colorRGB): + # Does the dtype matter? Can it be a float? + if not isinstance(colorRGB, (tuple, list)): + raise ValueError( + f"Received color that is not a list or tuple. Found : {colorRGB}" + ) + elif len(colorRGB) != 3: + raise ValueError( + f"Must provide RGB values for segmentation colors. Found : {colorRGB}" + ) + elif not all([0 <= color <= 255 for color in colorRGB]): + raise ValueError( + f"All rgb colors must be between 0 and 255. Found : {colorRGB}" + ) + return colorRGB + + +class NDMask(NDBaseTool): + ontology_type: Literal["superpixel"] = "superpixel" + mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( + json_schema_extra={"determinant": True} + ) + + +# A union with custom construction logic to improve error messages +class NDTool( + SpecialUnion, + Type[ # type: ignore + Union[ + NDMask, + NDTextEntity, + NDPoint, + NDRectangle, + NDPolyline, + NDPolygon, + ] + ], +): ... + + +class NDAnnotation( + SpecialUnion, + Type[Union[NDTool, NDClassification]], # type: ignore +): + @classmethod + def build(cls: Any, data) -> "NDBase": + if not isinstance(data, dict): + raise ValueError("value must be dict") + errors = [] + for cl in cls.get_union_types(): + try: + return cl(**data) + except KeyError as e: + errors.append(f"{cl.__name__}: {e}") + + raise ValueError( + "Unable to construct any annotation.\n{}".format("\n".join(errors)) + ) + + @classmethod + def schema(cls): + data = {"definitions": {}} + for type_ in cls.get_union_types(): + schema_ = type_.schema() + data["definitions"].update(schema_.pop("definitions")) + data[type_.__name__] = schema_ + return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index dfc87c8a4..6f8aebc58 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,6 +1,31 @@ from enum import Enum +class BulkImportRequestState(Enum): + """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). + + If you are not usinig MEA continue using BulkImportRequest. + AnnotationImports are in beta and will change soon. + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - State + - Description + * - RUNNING + - Indicates that the import job is not done yet. + * - FAILED + - Indicates the import job failed. Check `BulkImportRequest.errors` for more information + * - FINISHED + - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information + """ + + RUNNING = "RUNNING" + FAILED = "FAILED" + FINISHED = "FINISHED" + + class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f2de4db5e..f8876f7c4 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,29 +5,36 @@ import warnings from collections import namedtuple from datetime import datetime, timezone +from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, + Iterable, List, Optional, Tuple, + TypeVar, Union, overload, ) +from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +import requests +from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, + ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -39,6 +46,7 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, + validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -55,6 +63,7 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, + OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -63,7 +72,7 @@ ) if TYPE_CHECKING: - pass + from labelbox import BulkImportRequest DataRowPriority = int @@ -570,7 +579,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - "This function has only been tested to work with the Editor front end. Found %s", + f"This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -805,7 +814,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - "Batch exceeds max size, break into smaller batches" + f"Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1079,7 +1088,8 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Batch was not created successfully: " + json.dumps(task.errors) + f"Batch was not created successfully: " + + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1426,7 +1436,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Priority was not updated successfully: " + f"Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1478,6 +1488,33 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] + def bulk_import_requests(self) -> PaginatedCollection: + """Returns bulk import request objects which are used in model-assisted labeling. + These are returned with the oldest first, and most recent last. + """ + + id_param = "project_id" + query_str = """query ListAllImportRequestsPyApi($%s: ID!) { + bulkImportRequests ( + where: { projectId: $%s } + skip: %%d + first: %%d + ) { + %s + } + }""" % ( + id_param, + id_param, + query.results_query_part(Entity.BulkImportRequest), + ) + return PaginatedCollection( + self.client, + query_str, + {id_param: str(self.uid)}, + ["bulkImportRequests"], + Entity.BulkImportRequest, + ) + def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1592,7 +1629,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Data rows were not moved successfully: " + f"Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1602,6 +1639,77 @@ def _wait_for_task(self, task_id: str) -> Task: return task + def upload_annotations( + self, + name: str, + annotations: Union[str, Path, Iterable[Dict]], + validate: bool = False, + ) -> "BulkImportRequest": # type: ignore + """Uploads annotations to a new Editor project. + + Args: + name (str): name of the BulkImportRequest job + annotations (str or Path or Iterable): + url that is publicly accessible by Labelbox containing an + ndjson file + OR local path to an ndjson file + OR iterable of annotation rows + validate (bool): + Whether or not to validate the payload before uploading. + Returns: + BulkImportRequest + """ + + if isinstance(annotations, str) or isinstance(annotations, Path): + + def _is_url_valid(url: Union[str, Path]) -> bool: + """Verifies that the given string is a valid url. + + Args: + url: string to be checked + Returns: + True if the given url is valid otherwise False + + """ + if isinstance(url, Path): + return False + parsed = urlparse(url) + return bool(parsed.scheme) and bool(parsed.netloc) + + if _is_url_valid(annotations): + return Entity.BulkImportRequest.create_from_url( + client=self.client, + project_id=self.uid, + name=name, + url=str(annotations), + validate=validate, + ) + else: + path = Path(annotations) + if not path.exists(): + raise FileNotFoundError( + f"{annotations} is not a valid url nor existing local file" + ) + return Entity.BulkImportRequest.create_from_local_file( + client=self.client, + project_id=self.uid, + name=name, + file=path, + validate_file=validate, + ) + elif isinstance(annotations, Iterable): + return Entity.BulkImportRequest.create_from_objects( + client=self.client, + project_id=self.uid, + name=name, + predictions=annotations, # type: ignore + validate=validate, + ) + else: + raise ValueError( + f"Invalid annotations given of type: {type(annotations)}" + ) + def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py new file mode 100644 index 000000000..9abae1422 --- /dev/null +++ b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py @@ -0,0 +1,258 @@ +from unittest.mock import patch +import uuid +from labelbox import parser, Project +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +import pytest +import random +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.rectangle import ( + Rectangle, + RectangleUnit, +) +from labelbox.data.annotation_types.label import Label +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.ner import ( + DocumentEntity, + DocumentTextSelection, +) +from labelbox.data.annotation_types.video import VideoObjectAnnotation + +from labelbox.data.serialization import NDJsonConverter +from labelbox.exceptions import MALValidationError, UuidError +from labelbox.schema.bulk_import_request import BulkImportRequest +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.annotation_import import LabelImport, MALPredictionImport +from labelbox.schema.media_type import MediaType + +""" +- Here we only want to check that the uploads are calling the validation +- Then with unit tests we can check the types of errors raised +""" +# TODO: remove library once bulk import requests are removed + + +@pytest.mark.order(1) +def test_create_from_url(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_file(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + with pytest.raises(MALValidationError): + module_project.upload_annotations( + name=name, annotations=url, validate=True + ) + # Schema ids shouldn't match + + +def test_create_from_objects( + module_project: Project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_create_from_label_objects( + module_project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + labels = list(NDJsonConverter.deserialize(predictions)) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=labels + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + normalized_predictions = list(NDJsonConverter.serialize(labels)) + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, normalized_predictions + ) + + +def test_create_from_local_file( + tmp_path, predictions, module_project, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + file_name = f"{name}.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + parser.dump(predictions, f) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=str(file_path), validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_get(client, module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + bulk_import_request = BulkImportRequest.from_name( + client, project_id=module_project.uid, name=name + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_ndjson(tmp_path, module_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + +def test_validate_ndjson_uuid(tmp_path, module_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + uid = str(uuid.uuid4()) + repeat_uuid[0]["uuid"] = uid + repeat_uuid[1]["uuid"] = uid + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + +@pytest.mark.skip( + "Slow test and uses a deprecated api endpoint for annotation imports" +) +def test_wait_till_done(rectangle_inference, project): + name = str(uuid.uuid4()) + url = project.client.upload_data( + content=parser.dumps(rectangle_inference), sign=True + ) + bulk_import_request = project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert len(bulk_import_request.inputs) == 1 + bulk_import_request.wait_until_done() + assert bulk_import_request.state == BulkImportRequestState.FINISHED + + # Check that the status files are being returned as expected + assert len(bulk_import_request.errors) == 0 + assert len(bulk_import_request.inputs) == 1 + assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] + assert len(bulk_import_request.statuses) == 1 + assert bulk_import_request.statuses[0]["status"] == "SUCCESS" + assert ( + bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] + ) + + +def test_project_bulk_import_requests(module_project, predictions): + result = module_project.bulk_import_requests() + assert len(list(result)) == 0 + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + result = module_project.bulk_import_requests() + assert len(list(result)) == 3 + + +def test_delete(module_project, predictions): + name = str(uuid.uuid4()) + + bulk_import_requests = module_project.bulk_import_requests() + [ + bulk_import_request.delete() + for bulk_import_request in bulk_import_requests + ] + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 1 + + bulk_import_request.delete() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index 9e8963a26..a0df559fc 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,8 +1,24 @@ +from labelbox.schema.media_type import MediaType +from labelbox.schema.project import Project import pytest -from pytest_cases import fixture_ref, parametrize + +from labelbox import parser +from pytest_cases import parametrize, fixture_ref from labelbox.exceptions import MALValidationError -from labelbox.schema.media_type import MediaType +from labelbox.schema.bulk_import_request import ( + NDChecklist, + NDClassification, + NDMask, + NDPolygon, + NDPolyline, + NDRadio, + NDRectangle, + NDText, + NDTextEntity, + NDTool, + _validate_ndjson, +) """ - These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed @@ -175,6 +191,39 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) +def test_validate_ndjson(tmp_path, configured_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + configured_project.upload_annotations( + name="name", annotations=str(file_path), validate=True + ) + + +def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + repeat_uuid[0]["uuid"] = "test_uuid" + repeat_uuid[1]["uuid"] = "test_uuid" + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(MALValidationError): + configured_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(MALValidationError): + configured_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json new file mode 100644 index 000000000..4de15e217 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json @@ -0,0 +1,54 @@ +[ + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "confidence": 0.8, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" + }, + { + "answer": [ + { + "schemaId": "ckrb1sfl8099e0y919v260awv", + "confidence": 0.82, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } + ], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json new file mode 100644 index 000000000..83a95e5bf --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json @@ -0,0 +1,25 @@ +[{ + "location": { + "start": 67, + "end": 128 + }, + "messageId": "some-message-id", + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] +}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 75fe36e44..91563b8ae 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,17 +8,16 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - }, - "classifications": [] + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -29,17 +28,20 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - }, - "classifications": [] + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -48,39 +50,762 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 }, { - "x": 15.0, - "y": 20.0 + "x": 1119, + "y": 934 }, { - "x": 20.0, - "y": 25.0 + "x": 1118, + "y": 935 }, { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json new file mode 100644 index 000000000..591e40cf6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json @@ -0,0 +1,823 @@ +[ + { + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + "schemaId": "ckrazcueb16og0z6609jj7y3y", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + { + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + "schemaId": "ckrazcuec16ok0z66f956apb7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.834, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } + }, + { + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + "schemaId": "ckrazcuec16oi0z66dzrd8pfl", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.986, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "polygon": [ + { + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 + } + ] + }, + { + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + "schemaId": "ckrazcuec16om0z66bhhh4tp7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "point": { + "x": 2122, + "y": 1457 + } + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 466a03594..82be4cdab 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,86 +1,826 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "ckrazcueb16og0z6609jj7y3y", + "name": "box a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "confidence": 0.854, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.7 } - ], - "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - } + ] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "ckrazcuec16ok0z66f956apb7", + "name": "mask a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.834, + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + }, + "confidence": 0.685, "customMetrics": [ { "name": "customMetric1", - "value": 0.3 + "value": 0.4 + }, + { + "name": "customMetric2", + "value": 0.9 } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - } + ] }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "ckrazcuec16oi0z66dzrd8pfl", + "name": "polygon a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.986, + "confidence": 0.71, "customMetrics": [ { "name": "customMetric1", - "value": 0.9 + "value": 0.1 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 }, { - "x": 15.0, - "y": 20.0 + "x": 1099, + "y": 911 }, { - "x": 20.0, - "y": 25.0 + "x": 1100, + "y": 911 }, { - "x": 10.0, - "y": 20.0 + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "ckrazcuec16om0z66bhhh4tp7", + "name": "point a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, + "confidence": 0.77, + "customMetrics": [ + { + "name": "customMetric2", + "value": 1.2 + } + ], "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json new file mode 100644 index 000000000..31be5a4c7 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json @@ -0,0 +1,10 @@ +[ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "aggregation": "ARITHMETIC_MEAN", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "metricValue": 0.1 + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json new file mode 100644 index 000000000..f4b4894f6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json @@ -0,0 +1,155 @@ +[{ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 4, + "unit": "POINTS", + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 162.73, + "left": 32.45, + "height": 388.16999999999996, + "width": 101.66000000000001 + } +}, { + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "bbox": { + "top": 223.26, + "left": 251.42, + "height": 457.03999999999996, + "width": 186.78 + } +}, { + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 6, + "unit": "POINTS", + "confidence": 0.99, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 32.52, + "left": 218.17, + "height": 231.73, + "width": 110.56000000000003 + } +}, { + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "confidence": 0.89, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 117.39, + "left": 4.25, + "height": 456.9200000000001, + "width": 164.83 + } +}, { + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 8, + "unit": "POINTS", + "bbox": { + "top": 82.13, + "left": 217.28, + "height": 279.76, + "width": 82.43000000000004 + } +}, { + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 3, + "unit": "POINTS", + "bbox": { + "top": 298.12, + "left": 83.34, + "height": 203.83000000000004, + "width": 0.37999999999999545 + } +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "named_entity", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] +} +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json new file mode 100644 index 000000000..d6a9eecbd --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json @@ -0,0 +1,36 @@ +[ + { + "line": [ + { + "x": 2534.353, + "y": 249.471 + }, + { + "x": 2429.492, + "y": 182.092 + }, + { + "x": 2294.322, + "y": 221.962 + } + ], + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-line", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.58, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json new file mode 100644 index 000000000..1f26d8dc8 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json @@ -0,0 +1,26 @@ +[ + { + "location": { + "start": 67, + "end": 128 + }, + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json new file mode 100644 index 000000000..11e0753d9 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json @@ -0,0 +1,166 @@ +[{ + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" + }, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "schemaId": "ckrb1sfl8099e0y919v260awv" + }], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" +}, { + "classifications": [], + "schemaId": + "cl5islwg200gfci6g0oitaypu", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 + }, + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5iw0roz00lwci6g5jni62vs", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 + }, + "classifications": [] + }] + }] +}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..0bc3c8924 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,6 +37,13 @@ def test_serialization_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_classification(): label = Label( @@ -127,6 +134,12 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested(): label = Label( @@ -220,6 +233,13 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested_2(): label = Label( @@ -310,3 +330,9 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 82adce99c..8dcb17f0b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,73 +1,15 @@ import json -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - Radio, - Text, -) -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - ClassificationAnswer, -) -from labelbox.data.mixins import CustomMetric - def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.8, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.82, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -76,48 +18,6 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - name="classification a", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="choice 1", - ), - ), - ), - ClassificationAnnotation( - name="classification b", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.945, - name="choice 2", - ) - ], - ), - ), - ClassificationAnnotation( - name="classification c", - extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..f7da9181b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,12 +1,8 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -103,62 +99,25 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - -def test_conversation_entity_import(): - with open( - "tests/data/assets/ndjson/conversation_entity_import.json", "r" - ) as file: - data = json.load(file) - - label = lb_types.Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) - assert res == data + deserialized_label = list(NDJsonConverter().deserialize(ndjson)) + deserialized_label[0].annotations[0].extra.pop("uuid") + assert deserialized_label[0].model_dump(exclude_none=True) == label[ + 0 + ].model_dump(exclude_none=True) -def test_conversation_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/conversation_entity_import.json", "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_conversation_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - label = lb_types.Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, extra={}, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 999e1bda5..333c00250 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,29 +1,67 @@ +from copy import copy +import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter +from labelbox.data.serialization.ndjson.objects import ( + NDDicomSegments, + NDDicomSegment, + NDDicomLine, +) + +""" +Data gen prompt test data +""" + +prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), +) + +prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, +} + +data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], +) + +""" +Prompt annotation test +""" def test_serialize_label(): - prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - extra={"uuid": "test"}, - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), - ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + # Remove uuid field since this is a random value that can not be specified also meant for relationships + del serialized_label["uuid"] + assert serialized_label == prompt_text_ndjson + - prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "test", - } - - data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], +def test_deserialize_label(): + deserialized_label = next( + NDJsonConverter().deserialize([prompt_text_ndjson]) ) - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + if hasattr(deserialized_label.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized_label.annotations[0].extra = {} + assert deserialized_label.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) - assert serialized_label == prompt_text_ndjson + +def test_serialize_deserialize_label(): + serialized = list(NDJsonConverter.serialize([data_gen_label])) + deserialized = next(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized.annotations[0].extra = {} + assert deserialized.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..633214367 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,5 +1,6 @@ from copy import copy import pytest +import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -180,3 +181,28 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson + + +@pytest.mark.parametrize("label, ndjson", labels_ndjsons) +def test_deserialize_label(label, ndjson): + deserialized_label = next(NDJsonConverter().deserialize([ndjson])) + if hasattr(deserialized_label.annotations[0], "extra"): + deserialized_label.annotations[0].extra = {} + for i, annotation in enumerate(deserialized_label.annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value + + +@pytest.mark.parametrize("label", labels) +def test_serialize_deserialize_label(label): + serialized = list(NDJsonConverter.serialize([label])) + deserialized = list(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized[0].annotations[0], "extra"): + deserialized[0].annotations[0].extra = {} + for i, annotation in enumerate(deserialized[0].annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..5fe6a9789 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,19 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - RectangleUnit, - Point, - DocumentRectangle, - DocumentEntity, - DocumentTextSelection, -) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -66,144 +53,10 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - labels = [ - Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.89, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_with_name_only(): @@ -212,135 +65,26 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id=None, - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.74, - name="boxy", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson + + +def test_pdf_bbox_deserialize(): + deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) + deserialized[0].annotations[0].extra = {} + assert ( + deserialized[0].annotations[0].value + == bbox_labels[0].annotations[0].value + ) + assert ( + deserialized[0].annotations[0].name + == bbox_labels[0].annotations[0].name + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 1ab678cde..4adcd9935 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,6 +9,8 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", + file_path=None, + frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -20,7 +22,6 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", - "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -587,4 +588,31 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - assert label == video_serialized_bbox_label() + manual_label = video_serialized_bbox_label() + + for key in label.keys(): + # ignore uuid because we randomize if there was none + if key != "uuid": + assert label[key] == manual_label[key] + + assert len(label["segments"]) == 2 + assert len(label["segments"][0]["keyframes"]) == 2 + assert len(label["segments"][1]["keyframes"]) == 4 + + # #converts back only the keyframes. should be the sum of all prev segments + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + assert len(label.annotations) == 6 + + +def test_confidence_is_ignored(): + label = video_bbox_label() + serialized_labels = NDJsonConverter.serialize([label]) + label = next(serialized_labels) + label["confidence"] = 0.453 + label["segments"][0]["confidence"] = 0.453 + + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + for annotation in label.annotations: + assert annotation.confidence is None diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..84c017497 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,6 +34,16 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "text_answer" + assert annotation_value.confidence == 0.5 + def test_nested_serialization(): label = Label( @@ -92,3 +102,19 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + answer = annotation.value.answer[0] + assert answer.confidence == 0.9 + assert answer.name == "first_answer" + + classification_answer = answer.classifications[0].value.answer + assert classification_answer.confidence == 0.8 + assert classification_answer.name == "first_sub_radio_answer" + + sub_classification_answer = classification_answer.classifications[0].value + assert type(sub_classification_answer) is Text + assert sub_classification_answer.answer == "nested answer" + assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index d104a691e..2b3fa7f8c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,74 +1,73 @@ -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import json +import pytest + +from labelbox.data.serialization.ndjson.classification import NDRadio + from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - Radio, - ClassificationAnswer, -) +from labelbox.data.serialization.ndjson.objects import NDLine -def test_generic_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def round_dict(data): + if isinstance(data, dict): + for key in data: + if isinstance(data[key], float): + data[key] = int(data[key]) + elif isinstance(data[key], dict): + data[key] = round_dict(data[key]) + elif isinstance(data[key], (list, tuple)): + data[key] = [round_dict(r) for r in data[key]] - label = Label( - data=GenericDataRowData( - global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) + return data + + +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/classification_import_global_key.json", + "tests/data/assets/ndjson/metric_import_global_key.json", + "tests/data/assets/ndjson/polyline_import_global_key.json", + "tests/data/assets/ndjson/text_entity_import_global_key.json", + "tests/data/assets/ndjson/conversation_entity_import_global_key.json", + ], +) +def test_many_types(filename: str): + with open(filename, "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == data + f.close() - res = list(NDJsonConverter.serialize([label])) - assert res == expected +def test_image(): + with open( + "tests/data/assets/ndjson/image_import_global_key.json", "r" + ) as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() -def test_dict_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def test_pdf(): + with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() - label = Label( - data={ - "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", - }, - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) - res = list(NDJsonConverter.serialize([label])) +def test_video(): + with open( + "tests/data/assets/ndjson/video_import_global_key.json", "r" + ) as f: + data = json.load(f) - assert res == expected + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] + f.close() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..1729e1f46 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,8 +1,4 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -14,7 +10,6 @@ ImageData, MaskData, ) -from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -34,74 +29,12 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_image_with_name_only(): @@ -110,74 +43,11 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - name="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - name="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - name="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - name="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask(): @@ -187,11 +57,10 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], - "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -199,54 +68,16 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": (255, 0, 0), + "colorRGB": [255, 0, 0], }, - "classifications": [], }, ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) - mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) - mask_numpy = mask_numpy.astype(np.uint8) - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.8, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Mask( - mask=MaskData(arr=mask_numpy), - color=(1, 1, 1), - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - extra={}, - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=(255, 0, 0), - ), - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 40e098405..45c5c67bf 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,166 +1,38 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.annotation_types.metrics.confusion_matrix import ( - ConfusionMatrixMetric, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ScalarMetric, - ScalarMetricAggregation, - ConfusionMatrixAggregation, -) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert reserialized == data def test_custom_scalar_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: 0.1, 0.2: 0.5}, - "metricName": "custom_iou", - "aggregation": "SUM", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value=0.1, - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value={"0.1": 0.1, "0.2": 0.5}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_scalar_import.json", "r" + ) as file: + data = json.load(file) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) def test_custom_confusion_matrix_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (1, 1, 2, 3), - "metricName": "50%_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (0, 1, 2, 5), - "metricName": "50%_iou", - "featureName": "sample_class", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - "metricName": "50%_iou", - "aggregation": "CONFUSION_MATRIX", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ConfusionMatrixMetric( - value=(1, 1, 2, 3), - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value=(0, 1, 2, 5), - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" + ) as file: + data = json.load(file) - assert data == res + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 202f793fe..69594ff73 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,125 +1,32 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest from labelbox.data.serialization import NDJsonConverter -from labelbox.types import ( - Label, - MessageEvaluationTaskAnnotation, - MessageSingleSelectionTask, - MessageMultiSelectionTask, - MessageInfo, - OrderedMessageInfo, - MessageRankingTask, -) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cnjencjencjfencvj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="single-selection", - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, - value=MessageSingleSelectionTask( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - parent_message_id="clxfznjb800073b6v43ppx9ca", - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cfcerfvergerfefj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="multi-selection", - extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, - value=MessageMultiSelectionTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - selected_messages=[ - MessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - ) - ], - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=2, - ), - ], - ), - ) - ], - ), - ] + deserialized = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(deserialized)) - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert data == reserialized def test_mesage_ranking_task_wrong_order_serialization(): + with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: + data = json.load(file) + + some_ranking_task = next( + task + for task in data + if task["messageEvaluationTask"]["format"] == "message-ranking" + ) + some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ + "order" + ] = 3 + with pytest.raises(ValueError): - ( - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={ - "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" - }, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - ], - ), - ) - ], - ), - ) + list(NDJsonConverter.deserialize([some_ranking_task])) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py new file mode 100644 index 000000000..790bd87b3 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py @@ -0,0 +1,19 @@ +import json +from labelbox.data.serialization.ndjson.label import NDLabel +from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle +import pytest + + +def test_bad_annotation_input(): + data = [{"test": 3}] + with pytest.raises(ValueError): + NDLabel(**{"annotations": data}) + + +def test_correct_annotation_input(): + with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: + data = json.load(f) + assert isinstance( + NDLabel(**{"annotations": [data[0]]}).annotations[0], + NDDocumentRectangle, + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index 3633c9cbe..e0f0df0e6 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,135 +1,13 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Rectangle, - Point, - ClassificationAnnotation, - Radio, - ClassificationAnswer, - Text, - Checklist, -) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.34, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "5d03213e-4408-456c-9eca-cf0723202961", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.894, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={}, - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -138,112 +16,6 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="box a", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.811, - name="first answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box b", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification b", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.815, - name="second answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification c", - value=Checklist( - answer=[ - ClassificationAnswer( - name="third answer", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="a string", - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index cd11d97fe..97d48a14e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,76 +1,18 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ObjectAnnotation, Point, Line, Label - - -def test_polyline_import_with_confidence(): - with open( - "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" - ) as file: - data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_polyline_import_without_confidence(): - with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/polyline_without_confidence_import.json", + "tests/data/assets/ndjson/polyline_import.json", + ], +) +def test_polyline_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.58, - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..bd80f9267 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,3 +1,4 @@ +import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -39,6 +40,14 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_radio_classification(): label = Label( @@ -92,3 +101,10 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations[0].model_dump( + exclude_none=True + ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 0e42ab152..66630dbb5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,10 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -12,26 +8,8 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="bbox", - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - }, - value=Rectangle( - start=Point(x=38.0, y=28.0), - end=Point(x=81.0, y=69.0), - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -61,6 +39,8 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, }, ) @@ -68,9 +48,8 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - data = list(NDJsonConverter.serialize([label])) - - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_rectangle_mixed_start_end_points(): @@ -97,13 +76,17 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, + }, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - data = list(NDJsonConverter.serialize([label])) - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index 235b66957..f33719035 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,135 +1,16 @@ import json +from uuid import uuid4 -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Point, - Rectangle, - RelationshipAnnotation, - Relationship, -) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = [ - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - extra={}, - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl-DIFFERENT", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - ] + res = list(NDJsonConverter.deserialize(data)) res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -163,3 +44,29 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] + + +def test_relationship_nonexistent_object(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + relationship_annotation = data[2] + source_uuid = relationship_annotation["relationship"]["source"] + target_uuid = str(uuid4()) + relationship_annotation["relationship"]["target"] = target_uuid + error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" + + with pytest.raises(ValueError, match=error_msg): + list(NDJsonConverter.deserialize(data)) + + +def test_relationship_duplicate_uuids(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + source, target = data[0], data[1] + target["uuid"] = source["uuid"] + error_msg = f"UUID '{source['uuid']}' is not unique" + + with pytest.raises(AssertionError, match=error_msg): + list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..d5e81c51a 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,5 +1,7 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnswer, + Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -32,3 +34,11 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index fb93f15d4..3e856f001 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,68 +1,21 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, TextEntity - - -def test_text_entity_import(): - with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: - data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_text_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/text_entity_import.json", "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_text_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..c7a6535c4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json +from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, - Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,10 +13,8 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import ( - VideoClassificationAnnotation, - VideoObjectAnnotation, -) +from labelbox.data.annotation_types.video import VideoObjectAnnotation +from labelbox import parser from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -26,275 +24,15 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), - annotations=[ - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=30, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=31, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=32, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=33, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=34, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=35, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=50, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=51, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=0, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=1, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=2, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=3, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=4, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=5, - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, - value=Text(answer="a value"), - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=10.0, y=10.0), - Point(x=100.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=15.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=100.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=8, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=10.0), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=50.0, y=50.0), - frame=5, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=50.0), - frame=10, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=10.0), - end=Point(x=155.0, y=110.0), - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=30.0), - end=Point(x=155.0, y=80.0), - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=200.0, y=300.0), - end=Point(x=350.0, y=700.0), - ), - frame=10, - keyframe=True, - segment_index=1, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_name_only(): @@ -302,274 +40,16 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), - annotations=[ - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=30, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=31, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=32, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=33, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=34, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=35, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=50, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=51, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=0, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=1, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=2, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=3, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=4, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=5, - ), - ClassificationAnnotation( - name="question 3", - extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, - value=Text(answer="a value"), - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=10.0, y=10.0), - Point(x=100.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=15.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=100.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=8, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=10.0), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=50.0, y=50.0), - frame=5, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=50.0), - frame=10, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=10.0), - end=Point(x=155.0, y=110.0), - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=30.0), - end=Point(x=155.0, y=80.0), - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=200.0, y=300.0), - end=Point(x=350.0, y=700.0), - ), - frame=10, - keyframe=True, - segment_index=1, - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_classification_global_subclassifications(): @@ -587,6 +67,7 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( + name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -613,7 +94,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = { + expected_second_annotation = nested_checklist_annotation_ndjson = { "name": "nested_checklist_question", "answer": [ { @@ -635,6 +116,12 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + assert annotation.name == label.annotations[i].name + def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -800,6 +287,14 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_video_classification_point(): bbox_annotation = [ @@ -950,6 +445,13 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + def test_video_classification_frameline(): bbox_annotation = [ @@ -1117,289 +619,9 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - -[ - { - "answer": "a value", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 5, "start": 0}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5islwg200gfci6g0oitaypu", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - { - "classifications": [], - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - } - ] - }, - ], - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - } - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - }, - { - "classifications": [], - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - }, - ] - }, - ], - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "segments": [ - { - "keyframes": [ - { - "bbox": { - "height": 100.0, - "left": 5.0, - "top": 10.0, - "width": 150.0, - }, - "classifications": [], - "frame": 1, - }, - { - "bbox": { - "height": 50.0, - "left": 5.0, - "top": 30.0, - "width": 150.0, - }, - "classifications": [], - "frame": 5, - }, - ] - }, - { - "keyframes": [ - { - "bbox": { - "height": 400.0, - "left": 200.0, - "top": 300.0, - "width": 150.0, - }, - "classifications": [], - "frame": 10, - } - ] - }, - ], - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - }, -] - -[ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}], - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "classifications": [], - "schemaId": "cl5islwg200gfci6g0oitaypu", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - { - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - } - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - "classifications": [], - } - ] - }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - "classifications": [], - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - "classifications": [], - }, - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0, - }, - "classifications": [], - }, - { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0, - }, - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0, - }, - "classifications": [], - } - ] - }, - ], - }, -] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value From c0892155c54d9c860fdc1cde9ecac36a7282b45c Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:58:36 -0500 Subject: [PATCH 06/44] Removed data types besides generic data row data --- .../data/annotation_types/__init__.py | 14 +- .../data/annotation_types/data/__init__.py | 12 +- .../data/annotation_types/data/audio.py | 7 - .../annotation_types/data/conversation.py | 7 - .../data/annotation_types/data/dicom.py | 7 - .../data/annotation_types/data/document.py | 7 - .../data/annotation_types/data/html.py | 7 - .../data/llm_prompt_creation.py | 7 - .../data/llm_prompt_response_creation.py | 9 - .../data/llm_response_creation.py | 7 - .../data/annotation_types/data/raster.py | 5 +- .../data/annotation_types/data/text.py | 115 ------- .../data/annotation_types/data/tiled_image.py | 294 ------------------ .../data/annotation_types/data/video.py | 173 ----------- .../labelbox/data/annotation_types/label.py | 42 +-- .../serialization/ndjson/classification.py | 12 +- .../data/serialization/ndjson/label.py | 40 +-- .../data/serialization/ndjson/metric.py | 8 +- .../labelbox/data/serialization/ndjson/mmc.py | 3 +- .../data/serialization/ndjson/objects.py | 29 +- .../data/serialization/ndjson/relationship.py | 4 +- libs/labelbox/src/labelbox/utils.py | 4 +- .../data/annotation_types/test_collection.py | 16 +- .../serialization/ndjson/test_checklist.py | 14 +- .../data/serialization/ndjson/test_image.py | 3 +- .../data/serialization/ndjson/test_radio.py | 8 +- .../data/serialization/ndjson/test_text.py | 5 +- 27 files changed, 57 insertions(+), 802 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/audio.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/document.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/html.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/text.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/video.py diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index 7908bc242..84d6d65a5 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -32,18 +32,8 @@ from .classification import Radio from .classification import Text -from .data import AudioData -from .data import ConversationData -from .data import DicomData -from .data import DocumentData -from .data import HTMLData -from .data import ImageData +from .data import GenericDataRowData from .data import MaskData -from .data import TextData -from .data import VideoData -from .data import LlmPromptResponseCreationData -from .data import LlmPromptCreationData -from .data import LlmResponseCreationData from .label import Label from .collection import LabelGenerator @@ -58,8 +48,6 @@ from .data.tiled_image import EPSG from .data.tiled_image import EPSGTransformer from .data.tiled_image import TiledBounds -from .data.tiled_image import TiledImageData -from .data.tiled_image import TileLayer from .llm_prompt_response.prompt import PromptText from .llm_prompt_response.prompt import PromptClassificationAnnotation diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py index 2522b2741..8d5e7289b 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py @@ -1,12 +1,2 @@ -from .audio import AudioData -from .conversation import ConversationData -from .dicom import DicomData -from .document import DocumentData -from .html import HTMLData -from .raster import ImageData from .raster import MaskData -from .text import TextData -from .video import VideoData -from .llm_prompt_response_creation import LlmPromptResponseCreationData -from .llm_prompt_creation import LlmPromptCreationData -from .llm_response_creation import LlmResponseCreationData +from .generic_data_row_data import GenericDataRowData diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py deleted file mode 100644 index 916fca99d..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class AudioData(BaseData, _NoCoercionMixin): - class_name: Literal["AudioData"] = "AudioData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py deleted file mode 100644 index ef6507dca..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py b/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py deleted file mode 100644 index ae4c377dc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DicomData(BaseData, _NoCoercionMixin): - class_name: Literal["DicomData"] = "DicomData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py b/libs/labelbox/src/labelbox/data/annotation_types/data/document.py deleted file mode 100644 index 810a3ed3e..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DocumentData(BaseData, _NoCoercionMixin): - class_name: Literal["DocumentData"] = "DocumentData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py b/libs/labelbox/src/labelbox/data/annotation_types/data/html.py deleted file mode 100644 index 7a78fcb7b..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class HTMLData(BaseData, _NoCoercionMixin): - class_name: Literal["HTMLData"] = "HTMLData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py deleted file mode 100644 index a1b0450bc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptCreationData"] = "LlmPromptCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py deleted file mode 100644 index a8dfce894..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py +++ /dev/null @@ -1,9 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptResponseCreationData"] = ( - "LlmPromptResponseCreationData" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py deleted file mode 100644 index a8963ed3f..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmResponseCreationData"] = "LlmResponseCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index ba4c6485f..0dd23e388 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -11,7 +11,7 @@ from pydantic import BaseModel, model_validator, ConfigDict from labelbox.exceptions import InternalServerError -from .base_data import BaseData + from ..types import TypedArray @@ -220,6 +220,3 @@ class MaskData(RasterData): url: Optional[str] = None arr: Optional[TypedArray[Literal['uint8']]] = None """ - - -class ImageData(RasterData, BaseData): ... diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py b/libs/labelbox/src/labelbox/data/annotation_types/data/text.py deleted file mode 100644 index fe4c222d3..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py +++ /dev/null @@ -1,115 +0,0 @@ -from typing import Callable, Optional - -import requests -from requests.exceptions import ConnectTimeout -from google.api_core import retry - -from pydantic import ConfigDict, model_validator -from labelbox.exceptions import InternalServerError -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class TextData(BaseData, _NoCoercionMixin): - """ - Represents text data. Requires arg file_path, text, or url - - >>> TextData(text="") - - Args: - file_path (str) - text (str) - url (str) - """ - - class_name: Literal["TextData"] = "TextData" - file_path: Optional[str] = None - text: Optional[str] = None - url: Optional[str] = None - model_config = ConfigDict(extra="forbid") - - @property - def value(self) -> str: - """ - Property that unifies the data access pattern for all references to the text. - - Returns: - string representation of the text - """ - if self.text: - return self.text - elif self.file_path: - with open(self.file_path, "r") as file: - text = file.read() - self.text = text - return text - elif self.url: - text = self.fetch_remote() - self.text = text - return text - else: - raise ValueError("Must set either url, file_path or im_bytes") - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry( - deadline=15.0, - predicate=retry.if_exception_type(ConnectTimeout, InternalServerError), - ) - def fetch_remote(self) -> str: - """ - Method for accessing url. - - If url is not publicly accessible or requires another access pattern - simply override this function - """ - response = requests.get(self.url) - if response.status_code in [500, 502, 503, 504]: - raise InternalServerError(response.text) - response.raise_for_status() - return response.text - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other text references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the text - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.text is not None: - self.url = signer(self.text.encode()) - else: - raise ValueError( - "One of url, im_bytes, file_path, numpy must not be None." - ) - return self.url - - @model_validator(mode="after") - def validate_date(self, values): - file_path = self.file_path - text = self.text - url = self.url - uid = self.uid - global_key = self.global_key - if uid == file_path == text == url == global_key == None: - raise ValueError( - "One of `file_path`, `text`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"TextData(file_path={self.file_path}," - f"text={self.text[:30] + '...' if self.text is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index adb8db549..cdb7f4127 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,300 +88,6 @@ def validate_bounds_lat_lng(self): return self -class TileLayer(BaseModel): - """Url that contains the tile layer. Must be in the format: - - https://c.tile.openstreetmap.org/{z}/{x}/{y}.png - - >>> layer = TileLayer( - url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", - name="slippy map tile" - ) - """ - - url: str - name: Optional[str] = "default" - - def asdict(self) -> Dict[str, str]: - return {"tileLayerUrl": self.url, "name": self.name} - - @field_validator("url") - def validate_url(cls, url): - xyz_format = "/{z}/{x}/{y}" - if xyz_format not in url: - raise ValueError(f"{url} needs to contain {xyz_format}") - return url - - -class TiledImageData(BaseData): - """Represents tiled imagery - - If specified version is 2, converts bounds from [lng,lat] to [lat,lng] - - Requires the following args: - tile_layer: TileLayer - tile_bounds: TiledBounds - zoom_levels: List[int] - Optional args: - max_native_zoom: int = None - tile_size: Optional[int] - version: int = 2 - alternative_layers: List[TileLayer] - - >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, - tile_bounds=TiledBounds, - zoom_levels=[1, 12]) - """ - - tile_layer: TileLayer - tile_bounds: TiledBounds - alternative_layers: List[TileLayer] = [] - zoom_levels: Tuple[int, int] - max_native_zoom: Optional[int] = None - tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE - version: Optional[int] = 2 - multithread: bool = True - - def __post_init__(self) -> None: - if self.max_native_zoom is None: - self.max_native_zoom = self.zoom_levels[0] - - def asdict(self) -> Dict[str, str]: - return { - "tileLayerUrl": self.tile_layer.url, - "bounds": [ - [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], - [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], - ], - "minZoom": self.zoom_levels[0], - "maxZoom": self.zoom_levels[1], - "maxNativeZoom": self.max_native_zoom, - "epsg": self.tile_bounds.epsg.name, - "tileSize": self.tile_size, - "alternativeLayers": [ - layer.asdict() for layer in self.alternative_layers - ], - "version": self.version, - } - - def raster_data( - self, zoom: int = 0, max_tiles: int = 32, multithread=True - ) -> RasterData: - """Converts the tiled image asset into a RasterData object containing an - np.ndarray. - - Uses the minimum zoom provided to render the image. - """ - if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: - xstart, ystart, xend, yend = self._get_simple_image_params(zoom) - elif self.tile_bounds.epsg == EPSG.EPSG4326: - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, self.tile_bounds - ) - elif self.tile_bounds.epsg == EPSG.EPSG3857: - # transform to 4326 - transformer = EPSGTransformer.create_geo_to_geo_transformer( - EPSG.EPSG3857, EPSG.EPSG4326 - ) - transforming_bounds = [ - transformer(self.tile_bounds.bounds[0]), - transformer(self.tile_bounds.bounds[1]), - ] - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, transforming_bounds - ) - else: - raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") - - self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) - - rounded_tiles, pixel_offsets = list( - zip( - *[ - self._tile_to_pixel(pt) - for pt in [xstart, ystart, xend, yend] - ] - ) - ) - - image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) - arr = self._crop_to_bounds(image, *pixel_offsets) - return RasterData(arr=arr) - - @property - def value(self) -> np.ndarray: - """Returns the value of a generated RasterData object.""" - return self.raster_data( - self.zoom_levels[0], multithread=self.multithread - ).value - - def _get_simple_image_params( - self, zoom - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - - Simple has different order of x / y than lat / lng because of how leaflet behaves - leaflet reports all points as pixel locations at a zoom of 0 - """ - xend, xstart, yend, ystart = ( - self.tile_bounds.bounds[1].x, - self.tile_bounds.bounds[0].x, - self.tile_bounds.bounds[1].y, - self.tile_bounds.bounds[0].y, - ) - return ( - *[ - x * (2 ** (zoom)) / self.tile_size - for x in [xstart, ystart, xend, yend] - ], - ) - - def _get_3857_image_params( - self, zoom: int, bounds: TiledBounds - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - """ - lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y - lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x - - # Convert to zoom 0 tile coordinates - xstart, ystart = self._latlng_to_tile(lat_start, lng_start) - xend, yend = self._latlng_to_tile(lat_end, lng_end) - - # Make sure that the tiles are increasing in order - xstart, xend = min(xstart, xend), max(xstart, xend) - ystart, yend = min(ystart, yend), max(ystart, yend) - return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) - - def _latlng_to_tile( - self, lat: float, lng: float, zoom=0 - ) -> Tuple[float, float]: - """Converts lat/lng to 3857 tile coordinates - Formula found here: - https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 - """ - scale = 2**zoom - lat_rad = math.radians(lat) - x = (lng + 180.0) / 360.0 * scale - y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale - return x, y - - def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: - """Rounds a tile coordinate and reports the remainder in pixels""" - rounded_tile = int(tile) - remainder = tile - rounded_tile - pixel_offset = int(self.tile_size * remainder) - return rounded_tile, pixel_offset - - def _fetch_image_for_bounds( - self, - x_tile_start: int, - y_tile_start: int, - x_tile_end: int, - y_tile_end: int, - zoom: int, - multithread=True, - ) -> np.ndarray: - """Fetches the tiles and combines them into a single image. - - If a tile cannot be fetched, a padding of expected tile size is instead added. - """ - - if multithread: - tiles = {} - with ThreadPoolExecutor( - max_workers=TILE_DOWNLOAD_CONCURRENCY - ) as exc: - for x in range(x_tile_start, x_tile_end + 1): - for y in range(y_tile_start, y_tile_end + 1): - tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) - - rows = [] - for y in range(y_tile_start, y_tile_end + 1): - row = [] - for x in range(x_tile_start, x_tile_end + 1): - try: - if multithread: - row.append(tiles[(x, y)].result()) - else: - row.append(self._fetch_tile(x, y, zoom)) - except: - row.append( - np.zeros( - shape=(self.tile_size, self.tile_size, 3), - dtype=np.uint8, - ) - ) - rows.append(np.hstack(row)) - - return np.vstack(rows) - - @retry.Retry(initial=1, maximum=16, multiplier=2) - def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: - """ - Fetches the image and returns an np array. - """ - data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) - data.raise_for_status() - decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] - if decoded.shape[:2] != (self.tile_size, self.tile_size): - logger.warning(f"Unexpected tile size {decoded.shape}.") - return decoded - - def _crop_to_bounds( - self, - image: np.ndarray, - x_px_start: int, - y_px_start: int, - x_px_end: int, - y_px_end: int, - ) -> np.ndarray: - """This function slices off the excess pixels that are outside of the bounds. - This occurs because only full tiles can be downloaded at a time. - """ - - def invert_point(pt): - # Must have at least 1 pixel for stability. - pt = max(pt, 1) - # All pixel points are relative to a single tile - # So subtracting the tile size inverts the axis - pt = pt - self.tile_size - return pt if pt != 0 else None - - x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) - return image[y_px_start:y_px_end, x_px_start:x_px_end, :] - - def _validate_num_tiles( - self, - xstart: float, - ystart: float, - xend: float, - yend: float, - max_tiles: int, - ): - """Calculates the number of expected tiles we would fetch. - - If this is greater than the number of max tiles, raise an error. - """ - total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) - if total_n_tiles > max_tiles: - raise ValueError( - f"Requested zoom results in {total_n_tiles} tiles." - f"Max allowed tiles are {max_tiles}" - f"Increase max tiles or reduce zoom level." - ) - - @field_validator("zoom_levels") - def validate_zoom_levels(cls, zoom_levels): - if zoom_levels[0] > zoom_levels[1]: - raise ValueError( - f"Order of zoom levels should be min, max. Received {zoom_levels}" - ) - return zoom_levels - - class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py b/libs/labelbox/src/labelbox/data/annotation_types/data/video.py deleted file mode 100644 index 581801036..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py +++ /dev/null @@ -1,173 +0,0 @@ -import logging -import os -import urllib.request -from typing import Callable, Dict, Generator, Optional, Tuple -from typing_extensions import Literal -from uuid import uuid4 - -import cv2 -import numpy as np -from google.api_core import retry - -from .base_data import BaseData -from ..types import TypedArray - -from pydantic import ConfigDict, model_validator - -logger = logging.getLogger(__name__) - - -class VideoData(BaseData): - """ - Represents video - """ - - file_path: Optional[str] = None - url: Optional[str] = None - frames: Optional[Dict[int, TypedArray[Literal["uint8"]]]] = None - # Required for discriminating between data types - model_config = ConfigDict(extra="forbid") - - def load_frames(self, overwrite: bool = False) -> None: - """ - Loads all frames into memory at once in order to access in non-sequential order. - This will use a lot of memory, especially for longer videos - - Args: - overwrite: Replace existing frames - """ - if self.frames and not overwrite: - return - - for count, frame in self.frame_generator(): - if self.frames is None: - self.frames = {} - self.frames[count] = frame - - @property - def value(self): - return self.frame_generator() - - def frame_generator( - self, cache_frames=False, download_dir="/tmp" - ) -> Generator[Tuple[int, np.ndarray], None, None]: - """ - A generator for accessing individual frames in a video. - - Args: - cache_frames (bool): Whether or not to cache frames while iterating through the video. - download_dir (str): Directory to save the video to. Defaults to `/tmp` dir - """ - if self.frames is not None: - for idx, frame in self.frames.items(): - yield idx, frame - return - elif self.url and not self.file_path: - file_path = os.path.join(download_dir, f"{uuid4()}.mp4") - logger.info("Downloading the video locally to %s", file_path) - self.fetch_remote(file_path) - self.file_path = file_path - - vidcap = cv2.VideoCapture(self.file_path) - - success, frame = vidcap.read() - count = 0 - if cache_frames: - self.frames = {} - while success: - frame = frame[:, :, ::-1] - yield count, frame - if cache_frames: - self.frames[count] = frame - success, frame = vidcap.read() - count += 1 - - def __getitem__(self, idx: int) -> np.ndarray: - if self.frames is None: - raise ValueError( - "Cannot select by index without iterating over the entire video or loading all frames." - ) - return self.frames[idx] - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry(deadline=15.0) - def fetch_remote(self, local_path) -> None: - """ - Method for downloading data from self.url - - If url is not publicly accessible or requires another access pattern - simply override this function - - Args: - local_path: Where to save the thing too. - """ - urllib.request.urlretrieve(self.url, local_path) - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other video references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the video - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.frames is not None: - self.file_path = self.frames_to_video(self.frames) - self.url = self.create_url(signer) - else: - raise ValueError("One of url, file_path, frames must not be None.") - return self.url - - def frames_to_video( - self, frames: Dict[int, np.ndarray], fps=20, save_dir="/tmp" - ) -> str: - """ - Compresses the data by converting a set of individual frames to a single video. - - """ - file_path = os.path.join(save_dir, f"{uuid4()}.mp4") - out = None - for key in frames.keys(): - frame = frames[key] - if out is None: - out = cv2.VideoWriter( - file_path, - cv2.VideoWriter_fourcc(*"MP4V"), - fps, - frame.shape[:2], - ) - out.write(frame) - if out is None: - return - out.release() - return file_path - - @model_validator(mode="after") - def validate_data(self): - file_path = self.file_path - url = self.url - frames = self.frames - uid = self.uid - global_key = self.global_key - - if uid == file_path == frames == url == global_key == None: - raise ValueError( - "One of `file_path`, `frames`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"VideoData(file_path={self.file_path}," - f"frames={'...' if self.frames is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 7eef43f31..9d5b92bdd 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -6,7 +6,6 @@ from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.tiled_image import TiledImageData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -14,19 +13,6 @@ from .relationship import RelationshipAnnotation from .llm_prompt_response.prompt import PromptClassificationAnnotation from .classification import ClassificationAnswer -from .data import ( - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - ImageData, - TextData, - VideoData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, -) from .geometry import Mask from .metrics import ScalarMetric, ConfusionMatrixMetric from .video import VideoClassificationAnnotation @@ -35,22 +21,6 @@ from ..ontology import get_feature_schema_lookup from pydantic import BaseModel, field_validator, model_serializer -DataType = Union[ - VideoData, - ImageData, - TextData, - TiledImageData, - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, - GenericDataRowData, -] - class Label(BaseModel): """Container for holding data and annotations @@ -67,14 +37,13 @@ class Label(BaseModel): Args: uid: Optional Label Id in Labelbox - data: Data of Label, Image, Video, Text or dict with a single key uid | global_key | external_id. - Note use of classes as data is deprecated. Use GenericDataRowData or dict with a single key instead. + data: GenericDataRowData or dict with a single key uid | global_key | external_id. annotations: List of Annotations in the label extra: additional context """ uid: Optional[Cuid] = None - data: DataType + data: GenericDataRowData annotations: List[ Union[ ClassificationAnnotation, @@ -94,13 +63,6 @@ class Label(BaseModel): def validate_data(cls, data): if isinstance(data, Dict): return GenericDataRowData(**data) - elif isinstance(data, GenericDataRowData): - return data - else: - warnings.warn( - f"Using {type(data).__name__} class for label.data is deprecated. " - "Use a dict or an instance of GenericDataRowData instead." - ) return data def object_annotations(self) -> List[ObjectAnnotation]: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index 2c3215265..86cf0d094 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Union, Optional -from labelbox.data.annotation_types import ImageData, TextData, VideoData +from labelbox.data.annotation_types import GenericDataRowData from labelbox.data.mixins import ( ConfidenceMixin, CustomMetric, @@ -232,7 +232,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, ) -> "NDText": @@ -264,7 +264,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, @@ -304,7 +304,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, ) -> "NDRadio": @@ -427,7 +427,7 @@ def from_common( annotation: Union[ ClassificationAnnotation, VideoClassificationAnnotation ], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: classify_obj = cls.lookup_classification(annotation) if classify_obj is None: @@ -475,7 +475,7 @@ def to_common( def from_common( cls, annotation: Union[PromptClassificationAnnotation], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: return NDPromptText.from_common( str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 7039ae834..ffaefb4d7 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -14,7 +14,6 @@ ) from ...annotation_types.video import VideoObjectAnnotation, VideoMaskAnnotation from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...annotation_types.data import DicomData, ImageData, TextData, VideoData from ...annotation_types.data.generic_data_row_data import GenericDataRowData from ...annotation_types.label import Label from ...annotation_types.ner import TextEntity, ConversationEntity @@ -214,46 +213,9 @@ def _generate_annotations( yield Label( annotations=annotations, - data=self._infer_media_type(group.data_row, annotations), + data=GenericDataRowData, ) - def _infer_media_type( - self, - data_row: DataRow, - annotations: List[ - Union[ - TextEntity, - ConversationEntity, - VideoClassificationAnnotation, - DICOMObjectAnnotation, - VideoObjectAnnotation, - ObjectAnnotation, - ClassificationAnnotation, - ScalarMetric, - ConfusionMatrixMetric, - ] - ], - ) -> Union[TextData, VideoData, ImageData]: - if len(annotations) == 0: - raise ValueError("Missing annotations while inferring media type") - - types = {type(annotation) for annotation in annotations} - data = GenericDataRowData - if (TextEntity in types) or (ConversationEntity in types): - data = TextData - elif ( - VideoClassificationAnnotation in types - or VideoObjectAnnotation in types - ): - data = VideoData - elif DICOMObjectAnnotation in types: - data = DicomData - - if data_row.id: - return data(uid=data_row.id) - else: - return data(global_key=data_row.global_key) - @staticmethod def _get_consecutive_frames( frames_indices: List[int], diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index b28e575cf..f8b522ab5 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -1,6 +1,6 @@ from typing import Optional, Union, Type -from labelbox.data.annotation_types.data import ImageData, TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.serialization.ndjson.base import DataRow, NDJsonBase from labelbox.data.annotation_types.metrics.scalar import ( ScalarMetric, @@ -51,7 +51,7 @@ def to_common(self) -> ConfusionMatrixMetric: @classmethod def from_common( - cls, metric: ConfusionMatrixMetric, data: Union[TextData, ImageData] + cls, metric: ConfusionMatrixMetric, data: GenericDataRowData ) -> "NDConfusionMatrixMetric": return cls( uuid=metric.extra.get("uuid"), @@ -83,7 +83,7 @@ def to_common(self) -> ScalarMetric: @classmethod def from_common( - cls, metric: ScalarMetric, data: Union[TextData, ImageData] + cls, metric: ScalarMetric, data: GenericDataRowData ) -> "NDScalarMetric": return cls( uuid=metric.extra.get("uuid"), @@ -107,7 +107,7 @@ def to_common( def from_common( cls, annotation: Union[ScalarMetric, ConfusionMatrixMetric], - data: Union[TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDScalarMetric, NDConfusionMatrixMetric]: obj = cls.lookup_object(annotation) return obj.from_common(annotation, data) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 74d185f45..b2dcfb5b4 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -9,6 +9,7 @@ MessageRankingTask, MessageEvaluationTaskAnnotation, ) +from ...annotation_types import GenericDataRowData class MessageTaskData(_CamelCaseMixin): @@ -35,7 +36,7 @@ def to_common(self) -> MessageEvaluationTaskAnnotation: def from_common( cls, annotation: MessageEvaluationTaskAnnotation, - data: Any, # Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDMessageTask": return cls( uuid=str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index 91abface6..1bcba7a89 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Tuple, Union, Optional import base64 +from labelbox.data.annotation_types.data.raster import MaskData from labelbox.data.annotation_types.ner.conversation_entity import ( ConversationEntity, ) @@ -21,9 +22,9 @@ from PIL import Image from labelbox.data.annotation_types import feature -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData -from ...annotation_types.data import ImageData, TextData, MaskData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.ner import ( DocumentEntity, DocumentTextSelection, @@ -96,7 +97,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPoint": @@ -161,7 +162,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDLine": @@ -245,7 +246,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPolygon": @@ -282,7 +283,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -329,7 +330,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -508,7 +509,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[VideoObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -545,7 +546,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[DICOMObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -601,7 +602,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDMask": @@ -706,7 +707,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDTextEntity": @@ -743,7 +744,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDDocumentEntity": @@ -778,7 +779,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDConversationEntity": @@ -836,7 +837,7 @@ def from_common( List[List[VideoObjectAnnotation]], VideoMaskAnnotation, ], - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> Union[ NDLine, NDPoint, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index 94c8e9879..d558ac244 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -1,7 +1,7 @@ from typing import Union from pydantic import BaseModel from .base import NDAnnotation, DataRow -from ...annotation_types.data import ImageData, TextData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType @@ -40,7 +40,7 @@ def to_common( def from_common( cls, annotation: RelationshipAnnotation, - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDRelationship": relationship = annotation.value return cls( diff --git a/libs/labelbox/src/labelbox/utils.py b/libs/labelbox/src/labelbox/utils.py index c76ce188f..dcf51be82 100644 --- a/libs/labelbox/src/labelbox/utils.py +++ b/libs/labelbox/src/labelbox/utils.py @@ -87,8 +87,8 @@ class _NoCoercionMixin: when serializing the object. Example: - class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" + class GenericDataRowData(BaseData, _NoCoercionMixin): + class_name: Literal["GenericDataRowData"] = "GenericDataRowData" """ diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 9deddc3c8..8b2627776 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -7,19 +7,21 @@ from labelbox.data.annotation_types import ( LabelGenerator, ObjectAnnotation, - ImageData, - MaskData, Line, Mask, Point, Label, + GenericDataRowData, + MaskData, ) from labelbox import OntologyBuilder, Tool @pytest.fixture def list_of_labels(): - return [Label(data=ImageData(url="http://someurl")) for _ in range(5)] + return [ + Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + ] @pytest.fixture @@ -73,7 +75,7 @@ def test_conversion(list_of_labels): def test_adding_schema_ids(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=GenericDataRowData(uid="123456"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -93,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -106,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -121,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[ ObjectAnnotation( name="1234", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..fb78916f4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -4,7 +4,7 @@ ClassificationAnswer, Radio, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -13,9 +13,8 @@ def test_serialization_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -41,9 +40,8 @@ def test_serialization_min(): def test_serialization_with_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -131,9 +129,8 @@ def test_serialization_with_classification(): def test_serialization_with_classification_double_nested(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -224,9 +221,8 @@ def test_serialization_with_classification_double_nested(): def test_serialization_with_classification_double_nested_2(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..4d615658c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -11,7 +11,6 @@ Mask, Label, ObjectAnnotation, - ImageData, MaskData, ) from labelbox.types import Rectangle, Polygon, Point @@ -262,7 +261,7 @@ def test_mask_from_arr(): ), ) ], - data=ImageData(uid="0" * 25), + data=GenericDataRowData(uid="0" * 25), ) res = next(NDJsonConverter.serialize([label])) res.pop("uuid") diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..ec57f0528 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -3,7 +3,7 @@ ClassificationAnswer, ) from labelbox.data.annotation_types.classification.classification import Radio -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -12,9 +12,8 @@ def test_serialization_with_radio_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -43,9 +42,8 @@ def test_serialization_with_radio_min(): def test_serialization_with_radio_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..28eba07bd 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -2,7 +2,7 @@ from labelbox.data.annotation_types.classification.classification import ( Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -11,9 +11,8 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( From 30819db04d66b7809a680659c4c88823a4bdb9b7 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:06:51 -0500 Subject: [PATCH 07/44] [PLT-1463] Removed ND deserialize from some unit test part 1 (#1804) --- .../classification_import_global_key.json | 54 -- ...conversation_entity_import_global_key.json | 25 - .../data/assets/ndjson/image_import.json | 779 +---------------- .../ndjson/image_import_global_key.json | 823 ------------------ .../assets/ndjson/image_import_name_only.json | 810 +---------------- .../ndjson/metric_import_global_key.json | 10 - .../assets/ndjson/pdf_import_global_key.json | 155 ---- .../ndjson/polyline_import_global_key.json | 36 - .../ndjson/text_entity_import_global_key.json | 26 - .../ndjson/video_import_global_key.json | 166 ---- .../serialization/ndjson/test_checklist.py | 26 - .../ndjson/test_classification.py | 108 ++- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 - .../serialization/ndjson/test_document.py | 294 ++++++- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 - .../serialization/ndjson/test_global_key.py | 125 +-- .../data/serialization/ndjson/test_image.py | 203 ++++- 20 files changed, 769 insertions(+), 3106 deletions(-) delete mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json deleted file mode 100644 index 4de15e217..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "confidence": 0.8, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [ - { - "schemaId": "ckrb1sfl8099e0y919v260awv", - "confidence": 0.82, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } - ], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json deleted file mode 100644 index 83a95e5bf..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json +++ /dev/null @@ -1,25 +0,0 @@ -[{ - "location": { - "start": 67, - "end": 128 - }, - "messageId": "some-message-id", - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] -}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 91563b8ae..75fe36e44 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,16 +8,17 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - } + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + }, + "classifications": [] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -28,20 +29,17 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + }, + "classifications": [] }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -50,762 +48,39 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 + "x": 10.0, + "y": 20.0 }, { - "x": 1119, - "y": 934 + "x": 15.0, + "y": 20.0 }, { - "x": 1118, - "y": 935 + "x": 20.0, + "y": 25.0 }, { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json deleted file mode 100644 index 591e40cf6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json +++ /dev/null @@ -1,823 +0,0 @@ -[ - { - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "schemaId": "ckrazcueb16og0z6609jj7y3y", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.851, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - { - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "schemaId": "ckrazcuec16ok0z66f956apb7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.834, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } - }, - { - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "schemaId": "ckrazcuec16oi0z66dzrd8pfl", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.986, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "polygon": [ - { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 - } - ] - }, - { - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "schemaId": "ckrazcuec16om0z66bhhh4tp7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "point": { - "x": 2122, - "y": 1457 - } - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 82be4cdab..466a03594 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,826 +1,86 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "box a", + "name": "ckrazcueb16og0z6609jj7y3y", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "confidence": 0.854, + "classifications": [], + "confidence": 0.851, "customMetrics": [ { "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.7 + "value": 0.4 } - ] + ], + "bbox": { + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "mask a", + "name": "ckrazcuec16ok0z66f956apb7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - }, - "confidence": 0.685, + "classifications": [], + "confidence": 0.834, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 - }, - { - "name": "customMetric2", - "value": 0.9 + "value": 0.3 } - ] + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + } }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "polygon a", + "name": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.71, + "confidence": 0.986, "customMetrics": [ { "name": "customMetric1", - "value": 0.1 + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 + "x": 10.0, + "y": 20.0 }, { - "x": 1099, - "y": 911 + "x": 15.0, + "y": 20.0 }, { - "x": 1100, - "y": 911 + "x": 20.0, + "y": 25.0 }, { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "point a", + "name": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.77, - "customMetrics": [ - { - "name": "customMetric2", - "value": 1.2 - } - ], "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json deleted file mode 100644 index 31be5a4c7..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "aggregation": "ARITHMETIC_MEAN", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "metricValue": 0.1 - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json deleted file mode 100644 index f4b4894f6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json +++ /dev/null @@ -1,155 +0,0 @@ -[{ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 4, - "unit": "POINTS", - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 162.73, - "left": 32.45, - "height": 388.16999999999996, - "width": 101.66000000000001 - } -}, { - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "bbox": { - "top": 223.26, - "left": 251.42, - "height": 457.03999999999996, - "width": 186.78 - } -}, { - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 6, - "unit": "POINTS", - "confidence": 0.99, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 32.52, - "left": 218.17, - "height": 231.73, - "width": 110.56000000000003 - } -}, { - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "confidence": 0.89, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 117.39, - "left": 4.25, - "height": 456.9200000000001, - "width": 164.83 - } -}, { - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 8, - "unit": "POINTS", - "bbox": { - "top": 82.13, - "left": 217.28, - "height": 279.76, - "width": 82.43000000000004 - } -}, { - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 3, - "unit": "POINTS", - "bbox": { - "top": 298.12, - "left": 83.34, - "height": 203.83000000000004, - "width": 0.37999999999999545 - } -}, -{ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "named_entity", - "classifications": [], - "textSelections": [ - { - "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - "tokenIds": [ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c" - ], - "page": 1 - } - ] -} -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json deleted file mode 100644 index d6a9eecbd..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - { - "line": [ - { - "x": 2534.353, - "y": 249.471 - }, - { - "x": 2429.492, - "y": 182.092 - }, - { - "x": 2294.322, - "y": 221.962 - } - ], - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-line", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.58, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json deleted file mode 100644 index 1f26d8dc8..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "location": { - "start": 67, - "end": 128 - }, - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json deleted file mode 100644 index 11e0753d9..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json +++ /dev/null @@ -1,166 +0,0 @@ -[{ - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" - }, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{ - "start": 30, - "end": 35 - }, { - "start": 50, - "end": 51 - }] -}, { - "answer": [{ - "schemaId": "ckrb1sfl8099e0y919v260awv" - }], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{ - "start": 0, - "end": 5 - }] -}, { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" -}, { - "classifications": [], - "schemaId": - "cl5islwg200gfci6g0oitaypu", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [{ - "keyframes": [{ - "frame": 1, - "line": [{ - "x": 10.0, - "y": 10.0 - }, { - "x": 100.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }, { - "frame": 5, - "line": [{ - "x": 15.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 8, - "line": [{ - "x": 100.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [{ - "keyframes": [{ - "frame": 1, - "point": { - "x": 10.0, - "y": 10.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 5, - "point": { - "x": 50.0, - "y": 50.0 - }, - "classifications": [] - }, { - "frame": 10, - "point": { - "x": 10.0, - "y": 50.0 - }, - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5iw0roz00lwci6g5jni62vs", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [{ - "keyframes": [{ - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0 - }, - "classifications": [] - }, { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0 - }, - "classifications": [] - }] - }] -}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 0bc3c8924..59f568c75 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,13 +37,6 @@ def test_serialization_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_classification(): label = Label( @@ -134,12 +127,6 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested(): label = Label( @@ -233,13 +220,6 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested_2(): label = Label( @@ -330,9 +310,3 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 8dcb17f0b..82adce99c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,15 +1,73 @@ import json +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + Radio, + Text, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ClassificationAnnotation, + ClassificationAnswer, +) +from labelbox.data.mixins import CustomMetric + def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.8, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.82, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data @@ -18,6 +76,48 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + name="classification a", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="choice 1", + ), + ), + ), + ClassificationAnnotation( + name="classification b", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.945, + name="choice 2", + ) + ], + ), + ), + ClassificationAnnotation( + name="classification c", + extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index f7da9181b..561f9ce86 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,8 +1,12 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -99,25 +103,62 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - deserialized_label = list(NDJsonConverter().deserialize(ndjson)) - deserialized_label[0].annotations[0].extra.pop("uuid") - assert deserialized_label[0].model_dump(exclude_none=True) == label[ - 0 - ].model_dump(exclude_none=True) +def test_conversation_entity_import(): + with open( + "tests/data/assets/ndjson/conversation_entity_import.json", "r" + ) as file: + data = json.load(file) -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/conversation_entity_import.json", + label = lb_types.Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) + assert res == data + + +def test_conversation_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - ], -) -def test_conversation_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = lb_types.Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, extra={}, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 333c00250..999e1bda5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,67 +1,29 @@ -from copy import copy -import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import ( - NDDicomSegments, - NDDicomSegment, - NDDicomLine, -) - -""" -Data gen prompt test data -""" - -prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), -) - -prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, -} - -data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], -) - -""" -Prompt annotation test -""" def test_serialize_label(): - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) - # Remove uuid field since this is a random value that can not be specified also meant for relationships - del serialized_label["uuid"] - assert serialized_label == prompt_text_ndjson - - -def test_deserialize_label(): - deserialized_label = next( - NDJsonConverter().deserialize([prompt_text_ndjson]) + prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + extra={"uuid": "test"}, + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), ) - if hasattr(deserialized_label.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized_label.annotations[0].extra = {} - assert deserialized_label.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "test", + } + + data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], + ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) -def test_serialize_deserialize_label(): - serialized = list(NDJsonConverter.serialize([data_gen_label])) - deserialized = next(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized.annotations[0].extra = {} - assert deserialized.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + assert serialized_label == prompt_text_ndjson diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 633214367..762891aa2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,6 +1,5 @@ from copy import copy import pytest -import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -181,28 +180,3 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson - - -@pytest.mark.parametrize("label, ndjson", labels_ndjsons) -def test_deserialize_label(label, ndjson): - deserialized_label = next(NDJsonConverter().deserialize([ndjson])) - if hasattr(deserialized_label.annotations[0], "extra"): - deserialized_label.annotations[0].extra = {} - for i, annotation in enumerate(deserialized_label.annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value - - -@pytest.mark.parametrize("label", labels) -def test_serialize_deserialize_label(label): - serialized = list(NDJsonConverter.serialize([label])) - deserialized = list(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized[0].annotations[0], "extra"): - deserialized[0].annotations[0].extra = {} - for i, annotation in enumerate(deserialized[0].annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index 5fe6a9789..a0897ad9f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,6 +1,19 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + RectangleUnit, + Point, + DocumentRectangle, + DocumentEntity, + DocumentTextSelection, +) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -53,10 +66,144 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.89, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_with_name_only(): @@ -65,26 +212,135 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id=None, + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.74, + name="boxy", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson - - -def test_pdf_bbox_deserialize(): - deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) - deserialized[0].annotations[0].extra = {} - assert ( - deserialized[0].annotations[0].value - == bbox_labels[0].annotations[0].value - ) - assert ( - deserialized[0].annotations[0].name - == bbox_labels[0].annotations[0].name - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 4adcd9935..1ab678cde 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,8 +9,6 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", - file_path=None, - frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -22,6 +20,7 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", + "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -588,31 +587,4 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - manual_label = video_serialized_bbox_label() - - for key in label.keys(): - # ignore uuid because we randomize if there was none - if key != "uuid": - assert label[key] == manual_label[key] - - assert len(label["segments"]) == 2 - assert len(label["segments"][0]["keyframes"]) == 2 - assert len(label["segments"][1]["keyframes"]) == 4 - - # #converts back only the keyframes. should be the sum of all prev segments - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - assert len(label.annotations) == 6 - - -def test_confidence_is_ignored(): - label = video_bbox_label() - serialized_labels = NDJsonConverter.serialize([label]) - label = next(serialized_labels) - label["confidence"] = 0.453 - label["segments"][0]["confidence"] = 0.453 - - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - for annotation in label.annotations: - assert annotation.confidence is None + assert label == video_serialized_bbox_label() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 84c017497..349be13a8 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,16 +34,6 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "text_answer" - assert annotation_value.confidence == 0.5 - def test_nested_serialization(): label = Label( @@ -102,19 +92,3 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - answer = annotation.value.answer[0] - assert answer.confidence == 0.9 - assert answer.name == "first_answer" - - classification_answer = answer.classifications[0].value.answer - assert classification_answer.confidence == 0.8 - assert classification_answer.name == "first_sub_radio_answer" - - sub_classification_answer = classification_answer.classifications[0].value - assert type(sub_classification_answer) is Text - assert sub_classification_answer.answer == "nested answer" - assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index 2b3fa7f8c..d104a691e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,73 +1,74 @@ -import json -import pytest - -from labelbox.data.serialization.ndjson.classification import NDRadio - +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import NDLine - - -def round_dict(data): - if isinstance(data, dict): - for key in data: - if isinstance(data[key], float): - data[key] = int(data[key]) - elif isinstance(data[key], dict): - data[key] = round_dict(data[key]) - elif isinstance(data[key], (list, tuple)): - data[key] = [round_dict(r) for r in data[key]] +from labelbox.types import ( + Label, + ClassificationAnnotation, + Radio, + ClassificationAnswer, +) - return data +def test_generic_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/classification_import_global_key.json", - "tests/data/assets/ndjson/metric_import_global_key.json", - "tests/data/assets/ndjson/polyline_import_global_key.json", - "tests/data/assets/ndjson/text_entity_import_global_key.json", - "tests/data/assets/ndjson/conversation_entity_import_global_key.json", - ], -) -def test_many_types(filename: str): - with open(filename, "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data - f.close() + label = Label( + data=GenericDataRowData( + global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) + res = list(NDJsonConverter.serialize([label])) -def test_image(): - with open( - "tests/data/assets/ndjson/image_import_global_key.json", "r" - ) as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() + assert res == expected -def test_pdf(): - with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() +def test_dict_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] + label = Label( + data={ + "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", + }, + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) -def test_video(): - with open( - "tests/data/assets/ndjson/video_import_global_key.json", "r" - ) as f: - data = json.load(f) + res = list(NDJsonConverter.serialize([label])) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] - f.close() + assert res == expected diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 1729e1f46..d67acb9c3 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,4 +1,8 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -10,6 +14,7 @@ ImageData, MaskData, ) +from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -29,12 +34,74 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_image_with_name_only(): @@ -43,11 +110,74 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + name="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + name="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + name="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + name="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_mask(): @@ -57,10 +187,11 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], + "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -68,16 +199,54 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [255, 0, 0], + "colorRGB": (255, 0, 0), }, + "classifications": [], }, ] - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) + mask_numpy = mask_numpy.astype(np.uint8) + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.8, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Mask( + mask=MaskData(arr=mask_numpy), + color=(1, 1, 1), + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + extra={}, + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=(255, 0, 0), + ), + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + + assert res == data def test_mask_from_arr(): From 761b1e9643cc3b0d02ee762f950051e7e0d3e6e5 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:08:20 -0500 Subject: [PATCH 08/44] [PLT-1463] Removed ND deserialize from some unit test part 2 (#1815) --- .../data/serialization/ndjson/test_metric.py | 170 +++- .../data/serialization/ndjson/test_mmc.py | 125 ++- .../ndjson/test_ndlabel_subclass_matching.py | 19 - .../data/serialization/ndjson/test_nested.py | 236 ++++- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 - .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 ++- .../data/serialization/ndjson/test_text.py | 10 - .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +++++++++++++++++- 11 files changed, 1593 insertions(+), 196 deletions(-) delete mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 45c5c67bf..40e098405 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,38 +1,166 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.metrics.confusion_matrix import ( + ConfusionMatrixMetric, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ScalarMetric, + ScalarMetricAggregation, + ConfusionMatrixAggregation, +) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert reserialized == data + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + assert res == data def test_custom_scalar_metric(): - with open( - "tests/data/assets/ndjson/custom_scalar_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: 0.1, 0.2: 0.5}, + "metricName": "custom_iou", + "aggregation": "SUM", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value=0.1, + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value={"0.1": 0.1, "0.2": 0.5}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert res == data def test_custom_confusion_matrix_metric(): - with open( - "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (1, 1, 2, 3), + "metricName": "50%_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (0, 1, 2, 5), + "metricName": "50%_iou", + "featureName": "sample_class", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + "metricName": "50%_iou", + "aggregation": "CONFUSION_MATRIX", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ConfusionMatrixMetric( + value=(1, 1, 2, 3), + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value=(0, 1, 2, 5), + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert data == res diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 69594ff73..202f793fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,32 +1,125 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest from labelbox.data.serialization import NDJsonConverter +from labelbox.types import ( + Label, + MessageEvaluationTaskAnnotation, + MessageSingleSelectionTask, + MessageMultiSelectionTask, + MessageInfo, + OrderedMessageInfo, + MessageRankingTask, +) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - deserialized = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(deserialized)) + labels = [ + Label( + data=GenericDataRowData( + uid="cnjencjencjfencvj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="single-selection", + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + value=MessageSingleSelectionTask( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + parent_message_id="clxfznjb800073b6v43ppx9ca", + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cfcerfvergerfefj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="multi-selection", + extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, + value=MessageMultiSelectionTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + selected_messages=[ + MessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + ) + ], + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=2, + ), + ], + ), + ) + ], + ), + ] - assert data == reserialized + res = list(NDJsonConverter.serialize(labels)) + assert res == data -def test_mesage_ranking_task_wrong_order_serialization(): - with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: - data = json.load(file) - - some_ranking_task = next( - task - for task in data - if task["messageEvaluationTask"]["format"] == "message-ranking" - ) - some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ - "order" - ] = 3 +def test_mesage_ranking_task_wrong_order_serialization(): with pytest.raises(ValueError): - list(NDJsonConverter.deserialize([some_ranking_task])) + ( + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={ + "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" + }, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + ], + ), + ) + ], + ), + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py deleted file mode 100644 index 790bd87b3..000000000 --- a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from labelbox.data.serialization.ndjson.label import NDLabel -from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle -import pytest - - -def test_bad_annotation_input(): - data = [{"test": 3}] - with pytest.raises(ValueError): - NDLabel(**{"annotations": data}) - - -def test_correct_annotation_input(): - with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: - data = json.load(f) - assert isinstance( - NDLabel(**{"annotations": [data[0]]}).annotations[0], - NDDocumentRectangle, - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index e0f0df0e6..3633c9cbe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,13 +1,135 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Rectangle, + Point, + ClassificationAnnotation, + Radio, + ClassificationAnswer, + Text, + Checklist, +) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.34, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "5d03213e-4408-456c-9eca-cf0723202961", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.894, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={}, + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -16,6 +138,112 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="box a", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.811, + name="first answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box b", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification b", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.815, + name="second answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification c", + value=Checklist( + answer=[ + ClassificationAnswer( + name="third answer", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="a string", + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index 97d48a14e..cd11d97fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,18 +1,76 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ObjectAnnotation, Point, Line, Label -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/polyline_without_confidence_import.json", - "tests/data/assets/ndjson/polyline_import.json", - ], -) -def test_polyline_import(filename: str): - with open(filename, "r") as file: +def test_polyline_import_with_confidence(): + with open( + "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" + ) as file: + data = json.load(file) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data + + +def test_polyline_import_without_confidence(): + with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.58, + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index bd80f9267..4458e335c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -40,14 +39,6 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_radio_classification(): label = Label( @@ -101,10 +92,3 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert res.annotations[0].model_dump( - exclude_none=True - ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 66630dbb5..0e42ab152 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,6 +1,10 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -8,8 +12,26 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + }, + value=Rectangle( + start=Point(x=38.0, y=28.0), + end=Point(x=81.0, y=69.0), + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -39,8 +61,6 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, }, ) @@ -48,8 +68,9 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + + assert res == data def test_rectangle_mixed_start_end_points(): @@ -76,17 +97,13 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, - }, + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + assert res == data def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index f33719035..235b66957 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,16 +1,135 @@ import json -from uuid import uuid4 -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Point, + Rectangle, + RelationshipAnnotation, + Relationship, +) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) + res = [ + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + extra={}, + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl-DIFFERENT", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + ] res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -44,29 +163,3 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] - - -def test_relationship_nonexistent_object(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - relationship_annotation = data[2] - source_uuid = relationship_annotation["relationship"]["source"] - target_uuid = str(uuid4()) - relationship_annotation["relationship"]["target"] = target_uuid - error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" - - with pytest.raises(ValueError, match=error_msg): - list(NDJsonConverter.deserialize(data)) - - -def test_relationship_duplicate_uuids(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - source, target = data[0], data[1] - target["uuid"] = source["uuid"] - error_msg = f"UUID '{source['uuid']}' is not unique" - - with pytest.raises(AssertionError, match=error_msg): - list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index d5e81c51a..21db389cb 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,7 +1,5 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( - ClassificationAnswer, - Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -34,11 +32,3 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index 3e856f001..fb93f15d4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,21 +1,68 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, TextEntity + + +def test_text_entity_import(): + with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: + data = json.load(file) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/text_entity_import.json", +def test_text_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - ], -) -def test_text_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index c7a6535c4..4fba5c2ca 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json -from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, + Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,8 +13,10 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import VideoObjectAnnotation -from labelbox import parser +from labelbox.data.annotation_types.video import ( + VideoClassificationAnnotation, + VideoObjectAnnotation, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -24,15 +26,275 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_name_only(): @@ -40,16 +302,274 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + name="question 3", + extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_classification_global_subclassifications(): @@ -67,7 +587,6 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( - name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -94,7 +613,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = nested_checklist_annotation_ndjson = { + expected_second_annotation = { "name": "nested_checklist_question", "answer": [ { @@ -116,12 +635,6 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - assert annotation.name == label.annotations[i].name - def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -287,14 +800,6 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_video_classification_point(): bbox_annotation = [ @@ -445,13 +950,6 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - def test_video_classification_frameline(): bbox_annotation = [ @@ -619,9 +1117,289 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value + +[ + { + "answer": "a value", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 5, "start": 0}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5islwg200gfci6g0oitaypu", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + { + "classifications": [], + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + } + ] + }, + ], + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + } + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + }, + { + "classifications": [], + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + }, + ] + }, + ], + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "segments": [ + { + "keyframes": [ + { + "bbox": { + "height": 100.0, + "left": 5.0, + "top": 10.0, + "width": 150.0, + }, + "classifications": [], + "frame": 1, + }, + { + "bbox": { + "height": 50.0, + "left": 5.0, + "top": 30.0, + "width": 150.0, + }, + "classifications": [], + "frame": 5, + }, + ] + }, + { + "keyframes": [ + { + "bbox": { + "height": 400.0, + "left": 200.0, + "top": 300.0, + "width": 150.0, + }, + "classifications": [], + "frame": 10, + } + ] + }, + ], + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + }, +] + +[ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{"start": 0, "end": 5}], + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "classifications": [], + "schemaId": "cl5islwg200gfci6g0oitaypu", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + { + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + } + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + "classifications": [], + } + ] + }, + { + "keyframes": [ + { + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + "classifications": [], + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + "classifications": [], + }, + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0, + }, + "classifications": [], + }, + { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0, + }, + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0, + }, + "classifications": [], + } + ] + }, + ], + }, +] From 379171a7a50c9b472962718414ccf1b6b69a4a33 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 17 Sep 2024 12:10:48 -0700 Subject: [PATCH 09/44] [PLT-1274] Vb/deprecate bulkimportrequest plt 1274 (#1821) --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/orm/model.py | 1 - .../labelbox/schema/bulk_import_request.py | 1004 ----------------- libs/labelbox/src/labelbox/schema/enums.py | 25 - libs/labelbox/src/labelbox/schema/project.py | 119 +- .../test_bulk_import_request.py | 258 ----- .../test_ndjson_validation.py | 36 - 7 files changed, 6 insertions(+), 1438 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 5b5ac1f67..f9b82b422 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,7 +6,6 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 84dcac774..1f3ee1d86 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,7 +386,6 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] - BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py deleted file mode 100644 index 8e11f3261..000000000 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ /dev/null @@ -1,1004 +0,0 @@ -import json -import time -from uuid import UUID, uuid4 -import functools - -import logging -from pathlib import Path -from google.api_core import retry -from labelbox import parser -import requests -from pydantic import ( - ValidationError, - BaseModel, - Field, - field_validator, - model_validator, - ConfigDict, - StringConstraints, -) -from typing_extensions import Literal, Annotated -from typing import ( - Any, - List, - Optional, - BinaryIO, - Dict, - Iterable, - Tuple, - Union, - Type, - Set, - TYPE_CHECKING, -) - -from labelbox import exceptions as lb_exceptions -from labelbox import utils -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Relationship -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.serialization import serialize_labels -from labelbox.orm.model import Field as lb_Field - -if TYPE_CHECKING: - from labelbox import Project - from labelbox.types import Label - -NDJSON_MIME_TYPE = "application/x-ndjson" -logger = logging.getLogger(__name__) - -# TODO: Deprecate this library in place of labelimport and malprediction import library. - - -def _determinants(parent_cls: Any) -> List[str]: - return [ - k - for k, v in parent_cls.model_fields.items() - if v.json_schema_extra and "determinant" in v.json_schema_extra - ] - - -def _make_file_name(project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - -# TODO(gszpak): move it to client.py -def _make_request_data( - project_id: str, name: str, content_length: int, file_name: str -) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % query.results_query_part(BulkImportRequest) - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length, - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})), - } - - -def _send_create_file_command( - client, - request_data: dict, - file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str], -) -> dict: - response = client.execute(data=request_data, files={file_name: file_data}) - - if not response.get("createBulkImportRequest", None): - raise lb_exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" - % response.get("errors", None) - or response.get("error", None) - ) - - return response - - -class BulkImportRequest(DbObject): - """Represents the import job when importing annotations. - - Attributes: - name (str) - state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) - input_file_url (str): URL to your web-hosted NDJSON file - error_file_url (str): NDJSON that contains error messages for failed annotations - status_file_url (str): NDJSON that contains status for each annotation - created_at (datetime): UTC timestamp for date BulkImportRequest was created - - project (Relationship): `ToOne` relationship to Project - created_by (Relationship): `ToOne` relationship to User - """ - - name = lb_Field.String("name") - state = lb_Field.Enum(BulkImportRequestState, "state") - input_file_url = lb_Field.String("input_file_url") - error_file_url = lb_Field.String("error_file_url") - status_file_url = lb_Field.String("status_file_url") - created_at = lb_Field.DateTime("created_at") - - project = Relationship.ToOne("Project") - created_by = Relationship.ToOne("User", False, "created_by") - - @property - def inputs(self) -> List[Dict[str, Any]]: - """ - Inputs for each individual annotation uploaded. - This should match the ndjson annotations that you have uploaded. - - Returns: - Uploaded ndjson. - - * This information will expire after 24 hours. - """ - return self._fetch_remote_ndjson(self.input_file_url) - - @property - def errors(self) -> List[Dict[str, Any]]: - """ - Errors for each individual annotation uploaded. This is a subset of statuses - - Returns: - List of dicts containing error messages. Empty list means there were no errors - See `BulkImportRequest.statuses` for more details. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.error_file_url) - - @property - def statuses(self) -> List[Dict[str, Any]]: - """ - Status for each individual annotation uploaded. - - Returns: - A status for each annotation if the upload is done running. - See below table for more details - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - Field - - Description - * - uuid - - Specifies the annotation for the status row. - * - dataRow - - JSON object containing the Labelbox data row ID for the annotation. - * - status - - Indicates SUCCESS or FAILURE. - * - errors - - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.status_file_url) - - @functools.lru_cache() - def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: - """ - Fetches the remote ndjson file and caches the results. - - Args: - url (str): Can be any url pointing to an ndjson file. - Returns: - ndjson as a list of dicts. - """ - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.text) - - def refresh(self) -> None: - """Synchronizes values of all fields with the database.""" - query_str, params = query.get_single(BulkImportRequest, self.uid) - res = self.client.execute(query_str, params) - res = res[utils.camel_case(BulkImportRequest.type_name())] - self._set_field_values(res) - - def wait_till_done(self, sleep_time_seconds: int = 5) -> None: - self.wait_until_done(sleep_time_seconds) - - def wait_until_done(self, sleep_time_seconds: int = 5) -> None: - """Blocks import job until certain conditions are met. - - Blocks until the BulkImportRequest.state changes either to - `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, - periodically refreshing object's state. - - Args: - sleep_time_seconds (str): a time to block between subsequent API calls - """ - while self.state == BulkImportRequestState.RUNNING: - logger.info(f"Sleeping for {sleep_time_seconds} seconds...") - time.sleep(sleep_time_seconds) - self.__exponential_backoff_refresh() - - @retry.Retry( - predicate=retry.if_exception_type( - lb_exceptions.ApiLimitError, - lb_exceptions.TimeoutError, - lb_exceptions.NetworkError, - ) - ) - def __exponential_backoff_refresh(self) -> None: - self.refresh() - - @classmethod - def from_name( - cls, client, project_id: str, name: str - ) -> "BulkImportRequest": - """Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name} - response = client.execute(query_str, params=params) - return cls(client, response["bulkImportRequest"]) - - @classmethod - def create_from_url( - cls, client, project_id: str, name: str, url: str, validate=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - validate (bool): a flag indicating if there should be a validation - if `url` is valid ndjson - Returns: - BulkImportRequest object - """ - if validate: - logger.warn( - "Validation is turned on. The file will be downloaded locally and processed before uploading." - ) - res = requests.get(url) - data = parser.loads(res.text) - _validate_ndjson(data, client.get_project(project_id)) - - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls( - client, bulk_import_request_response["createBulkImportRequest"] - ) - - @classmethod - def create_from_objects( - cls, - client, - project_id: str, - name: str, - predictions: Union[Iterable[Dict], Iterable["Label"]], - validate=True, - ) -> "BulkImportRequest": - """ - Creates a `BulkImportRequest` from an iterable of dictionaries. - - Conforms to JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - validate (bool): a flag indicating if there should be a validation - if `predictions` is valid ndjson - Returns: - BulkImportRequest object - """ - if not isinstance(predictions, list): - raise TypeError( - f"annotations must be in a form of Iterable. Found {type(predictions)}" - ) - ndjson_predictions = serialize_labels(predictions) - - if validate: - _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - - data_str = parser.dumps(ndjson_predictions) - if not data_str: - raise ValueError("annotations cannot be empty") - - data = data_str.encode("utf-8") - file_name = _make_file_name(project_id, name) - request_data = _make_request_data( - project_id, name, len(data_str), file_name - ) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, - request_data=request_data, - file_name=file_name, - file_data=file_data, - ) - - return cls(client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file( - cls, client, project_id: str, name: str, file: Path, validate_file=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = _make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = _make_request_data( - project_id, name, content_length, file_name - ) - - with file.open("rb") as f: - if validate_file: - reader = parser.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - _validate_ndjson(reader, client.get_project(project_id)) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, request_data, file_name, file_data - ) - return cls(client, response_data["createBulkImportRequest"]) - - def delete(self) -> None: - """Deletes the import job and also any annotations created by this import. - - Returns: - None - """ - id_param = "bulk_request_id" - query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { - deleteBulkImportRequest(where: {id: $%s}) { - id - name - } - }""" % (id_param, id_param) - self.client.execute(query_str, {id_param: self.uid}) - - -def _validate_ndjson( - lines: Iterable[Dict[str, Any]], project: "Project" -) -> None: - """ - Client side validation of an ndjson object. - - Does not guarentee that an upload will succeed for the following reasons: - * We are not checking the data row types which will cause the following errors to slip through - * Missing frame indices will not causes an error for videos - * Uploaded annotations for the wrong data type will pass (Eg. entity on images) - * We are not checking bounds of an asset (Eg. frame index, image height, text location) - - Args: - lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines - project (Project): id of project for which predictions will be imported - - Raises: - MALValidationError: Raise for invalid NDJson - UuidError: Duplicate UUID in upload - """ - feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( - project.ontology() - ) - uids: Set[str] = set() - for idx, line in enumerate(lines): - try: - annotation = NDAnnotation(**line) - annotation.validate_instance( - feature_schemas_by_id, feature_schemas_by_name - ) - uuid = str(annotation.uuid) - if uuid in uids: - raise lb_exceptions.UuidError( - f"{uuid} already used in this import job, " - "must be unique for the project." - ) - uids.add(uuid) - except (ValidationError, ValueError, TypeError, KeyError) as e: - raise lb_exceptions.MALValidationError( - f"Invalid NDJson on line {idx}" - ) from e - - -# The rest of this file contains objects for MAL validation -def parse_classification(tool): - """ - Parses a classification from an ontology. Only radio, checklist, and text are supported for mal - - Args: - tool (dict) - - Returns: - dict - """ - if tool["type"] in ["radio", "checklist"]: - option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] - option_names = [r["value"] for r in tool["options"]] - return { - "tool": tool["type"], - "featureSchemaId": tool["featureSchemaId"], - "name": tool["name"], - "options": [*option_schema_ids, *option_names], - } - elif tool["type"] == "text": - return { - "tool": tool["type"], - "name": tool["name"], - "featureSchemaId": tool["featureSchemaId"], - } - - -def get_mal_schemas(ontology): - """ - Converts a project ontology to a dict for easier lookup during ndjson validation - - Args: - ontology (Ontology) - Returns: - Dict, Dict : Useful for looking up a tool from a given feature schema id or name - """ - - valid_feature_schemas_by_schema_id = {} - valid_feature_schemas_by_name = {} - for tool in ontology.normalized["tools"]: - classifications = [ - parse_classification(classification_tool) - for classification_tool in tool["classifications"] - ] - classifications_by_schema_id = { - v["featureSchemaId"]: v for v in classifications - } - classifications_by_name = {v["name"]: v for v in classifications} - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - valid_feature_schemas_by_name[tool["name"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - for tool in ontology.normalized["classifications"]: - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( - parse_classification(tool) - ) - valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) - return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name - - -class Bbox(BaseModel): - top: float - left: float - height: float - width: float - - -class Point(BaseModel): - x: float - y: float - - -class FrameLocation(BaseModel): - end: int - start: int - - -class VideoSupported(BaseModel): - # Note that frames are only allowed as top level inferences for video - frames: Optional[List[FrameLocation]] = None - - -# Base class for a special kind of union. -class SpecialUnion: - def __new__(cls, **kwargs): - return cls.build(kwargs) - - @classmethod - def __get_validators__(cls): - yield cls.build - - @classmethod - def get_union_types(cls): - if not issubclass(cls, SpecialUnion): - raise TypeError("{} must be a subclass of SpecialUnion") - - union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] - if len(union_types) < 1: - raise TypeError( - "Class {cls} should inherit from a union of objects to build" - ) - if len(union_types) > 1: - raise TypeError( - f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" - ) - return union_types[0].__args__[0].__args__ - - @classmethod - def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": - """ - Checks through all objects in the union to see which matches the input data. - Args: - data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union - raises: - KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion - ValidationError: Error while trying to construct a specific object in the union - - """ - if isinstance(data, BaseModel): - data = data.model_dump() - - top_level_fields = [] - max_match = 0 - matched = None - - for type_ in cls.get_union_types(): - determinate_fields = _determinants(type_) - top_level_fields.append(determinate_fields) - matches = sum([val in determinate_fields for val in data]) - if matches == len(determinate_fields) and matches > max_match: - max_match = matches - matched = type_ - - if matched is not None: - # These two have the exact same top level keys - if matched in [NDRadio, NDText]: - if isinstance(data["answer"], dict): - matched = NDRadio - elif isinstance(data["answer"], str): - matched = NDText - else: - raise TypeError( - f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" - ) - return matched(**data) - else: - raise KeyError( - f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." - ) - - @classmethod - def schema(cls): - results = {"definitions": {}} - for cl in cls.get_union_types(): - schema = cl.schema() - results["definitions"].update(schema.pop("definitions")) - results[cl.__name__] = schema - return results - - -class DataRow(BaseModel): - id: str - - -class NDFeatureSchema(BaseModel): - schemaId: Optional[str] = None - name: Optional[str] = None - - @model_validator(mode="after") - def most_set_one(self): - if self.schemaId is None and self.name is None: - raise ValueError( - "Must set either schemaId or name for all feature schemas" - ) - return self - - -class NDBase(NDFeatureSchema): - ontology_type: str - uuid: UUID - dataRow: DataRow - model_config = ConfigDict(extra="forbid") - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - if self.name: - if self.name not in valid_feature_schemas_by_name: - raise ValueError( - f"Name {self.name} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_name[self.name]["tool"] - ): - raise ValueError( - f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" - ) - - if self.schemaId: - if self.schemaId not in valid_feature_schemas_by_id: - raise ValueError( - f"Schema id {self.schemaId} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_id[self.schemaId]["tool"] - ): - raise ValueError( - f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" - ) - - def validate_instance( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - self.validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - - -###### Classifications ###### - - -class NDText(NDBase): - ontology_type: Literal["text"] = "text" - answer: str = Field(json_schema_extra={"determinant": True}) - # No feature schema to check - - -class NDChecklist(VideoSupported, NDBase): - ontology_type: Literal["checklist"] = "checklist" - answers: List[NDFeatureSchema] = Field( - json_schema_extra={"determinant": True} - ) - - @field_validator("answers", mode="before") - def validate_answers(cls, value, field): - # constr not working with mypy. - if not len(value): - raise ValueError("Checklist answers should not be empty") - return value - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - # Test top level feature schema for this tool - super(NDChecklist, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - # Test the feature schemas provided to the answer field - if len( - set([answer.name or answer.schemaId for answer in self.answers]) - ) != len(self.answers): - raise ValueError( - f"Duplicated featureSchema found for checklist {self.uuid}" - ) - for answer in self.answers: - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if answer.name not in options and answer.schemaId not in options: - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" - ) - - -class NDRadio(VideoSupported, NDBase): - ontology_type: Literal["radio"] = "radio" - answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDRadio, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if ( - self.answer.name not in options - and self.answer.schemaId not in options - ): - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" - ) - - -# A union with custom construction logic to improve error messages -class NDClassification( - SpecialUnion, - Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore -): ... - - -###### Tools ###### - - -class NDBaseTool(NDBase): - classifications: List[NDClassification] = [] - - # This is indepdent of our problem - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDBaseTool, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - for classification in self.classifications: - classification.validate_feature_schemas( - valid_feature_schemas_by_name[self.name][ - "classificationsBySchemaId" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsBySchemaId" - ], - valid_feature_schemas_by_name[self.name][ - "classificationsByName" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsByName" - ], - ) - - @field_validator("classifications", mode="before") - def validate_subclasses(cls, value, field): - # Create uuid and datarow id so we don't have to define classification objects twice - # This is caused by the fact that we require these ids for top level classifications but not for subclasses - results = [] - dummy_id = "child".center(25, "_") - for row in value: - results.append( - {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} - ) - return results - - -class NDPolygon(NDBaseTool): - ontology_type: Literal["polygon"] = "polygon" - polygon: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("polygon") - def is_geom_valid(cls, v): - if len(v) < 3: - raise ValueError( - f"A polygon must have at least 3 points to be valid. Found {v}" - ) - return v - - -class NDPolyline(NDBaseTool): - ontology_type: Literal["line"] = "line" - line: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("line") - def is_geom_valid(cls, v): - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - return v - - -class NDRectangle(NDBaseTool): - ontology_type: Literal["rectangle"] = "rectangle" - bbox: Bbox = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class NDPoint(NDBaseTool): - ontology_type: Literal["point"] = "point" - point: Point = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class EntityLocation(BaseModel): - start: int - end: int - - -class NDTextEntity(NDBaseTool): - ontology_type: Literal["named-entity"] = "named-entity" - location: EntityLocation = Field(json_schema_extra={"determinant": True}) - - @field_validator("location") - def is_valid_location(cls, v): - if isinstance(v, BaseModel): - v = v.model_dump() - - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - if v["start"] < 0: - raise ValueError(f"Text location must be positive. Found {v}") - if v["start"] > v["end"]: - raise ValueError( - f"Text start location must be less or equal than end. Found {v}" - ) - return v - - -class RLEMaskFeatures(BaseModel): - counts: List[int] - size: List[int] - - @field_validator("counts") - def validate_counts(cls, counts): - if not all([count >= 0 for count in counts]): - raise ValueError( - "Found negative value for counts. They should all be zero or positive" - ) - return counts - - @field_validator("size") - def validate_size(cls, size): - if len(size) != 2: - raise ValueError( - f"Mask `size` should have two ints representing height and with. Found : {size}" - ) - if not all([count > 0 for count in size]): - raise ValueError( - f"Mask `size` should be a postitive int. Found : {size}" - ) - return size - - -class PNGMaskFeatures(BaseModel): - # base64 encoded png bytes - png: str - - -class URIMaskFeatures(BaseModel): - instanceURI: str - colorRGB: Union[List[int], Tuple[int, int, int]] - - @field_validator("colorRGB") - def validate_color(cls, colorRGB): - # Does the dtype matter? Can it be a float? - if not isinstance(colorRGB, (tuple, list)): - raise ValueError( - f"Received color that is not a list or tuple. Found : {colorRGB}" - ) - elif len(colorRGB) != 3: - raise ValueError( - f"Must provide RGB values for segmentation colors. Found : {colorRGB}" - ) - elif not all([0 <= color <= 255 for color in colorRGB]): - raise ValueError( - f"All rgb colors must be between 0 and 255. Found : {colorRGB}" - ) - return colorRGB - - -class NDMask(NDBaseTool): - ontology_type: Literal["superpixel"] = "superpixel" - mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( - json_schema_extra={"determinant": True} - ) - - -# A union with custom construction logic to improve error messages -class NDTool( - SpecialUnion, - Type[ # type: ignore - Union[ - NDMask, - NDTextEntity, - NDPoint, - NDRectangle, - NDPolyline, - NDPolygon, - ] - ], -): ... - - -class NDAnnotation( - SpecialUnion, - Type[Union[NDTool, NDClassification]], # type: ignore -): - @classmethod - def build(cls: Any, data) -> "NDBase": - if not isinstance(data, dict): - raise ValueError("value must be dict") - errors = [] - for cl in cls.get_union_types(): - try: - return cl(**data) - except KeyError as e: - errors.append(f"{cl.__name__}: {e}") - - raise ValueError( - "Unable to construct any annotation.\n{}".format("\n".join(errors)) - ) - - @classmethod - def schema(cls): - data = {"definitions": {}} - for type_ in cls.get_union_types(): - schema_ = type_.schema() - data["definitions"].update(schema_.pop("definitions")) - data[type_.__name__] = schema_ - return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index 6f8aebc58..dfc87c8a4 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,31 +1,6 @@ from enum import Enum -class BulkImportRequestState(Enum): - """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). - - If you are not usinig MEA continue using BulkImportRequest. - AnnotationImports are in beta and will change soon. - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - State - - Description - * - RUNNING - - Indicates that the import job is not done yet. - * - FAILED - - Indicates the import job failed. Check `BulkImportRequest.errors` for more information - * - FINISHED - - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information - """ - - RUNNING = "RUNNING" - FAILED = "FAILED" - FINISHED = "FINISHED" - - class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f8876f7c4..88153e48f 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,36 +5,29 @@ import warnings from collections import namedtuple from datetime import datetime, timezone -from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, - Iterable, List, Optional, Tuple, - TypeVar, Union, overload, ) -from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -import requests -from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -46,7 +39,6 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, - validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -63,7 +55,6 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -72,7 +63,7 @@ ) if TYPE_CHECKING: - from labelbox import BulkImportRequest + pass DataRowPriority = int @@ -579,7 +570,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -814,7 +805,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1088,7 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " + "Batch was not created successfully: " + json.dumps(task.errors) ) @@ -1436,7 +1427,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1488,33 +1479,6 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] - def bulk_import_requests(self) -> PaginatedCollection: - """Returns bulk import request objects which are used in model-assisted labeling. - These are returned with the oldest first, and most recent last. - """ - - id_param = "project_id" - query_str = """query ListAllImportRequestsPyApi($%s: ID!) { - bulkImportRequests ( - where: { projectId: $%s } - skip: %%d - first: %%d - ) { - %s - } - }""" % ( - id_param, - id_param, - query.results_query_part(Entity.BulkImportRequest), - ) - return PaginatedCollection( - self.client, - query_str, - {id_param: str(self.uid)}, - ["bulkImportRequests"], - Entity.BulkImportRequest, - ) - def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1629,7 +1593,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1639,77 +1603,6 @@ def _wait_for_task(self, task_id: str) -> Task: return task - def upload_annotations( - self, - name: str, - annotations: Union[str, Path, Iterable[Dict]], - validate: bool = False, - ) -> "BulkImportRequest": # type: ignore - """Uploads annotations to a new Editor project. - - Args: - name (str): name of the BulkImportRequest job - annotations (str or Path or Iterable): - url that is publicly accessible by Labelbox containing an - ndjson file - OR local path to an ndjson file - OR iterable of annotation rows - validate (bool): - Whether or not to validate the payload before uploading. - Returns: - BulkImportRequest - """ - - if isinstance(annotations, str) or isinstance(annotations, Path): - - def _is_url_valid(url: Union[str, Path]) -> bool: - """Verifies that the given string is a valid url. - - Args: - url: string to be checked - Returns: - True if the given url is valid otherwise False - - """ - if isinstance(url, Path): - return False - parsed = urlparse(url) - return bool(parsed.scheme) and bool(parsed.netloc) - - if _is_url_valid(annotations): - return Entity.BulkImportRequest.create_from_url( - client=self.client, - project_id=self.uid, - name=name, - url=str(annotations), - validate=validate, - ) - else: - path = Path(annotations) - if not path.exists(): - raise FileNotFoundError( - f"{annotations} is not a valid url nor existing local file" - ) - return Entity.BulkImportRequest.create_from_local_file( - client=self.client, - project_id=self.uid, - name=name, - file=path, - validate_file=validate, - ) - elif isinstance(annotations, Iterable): - return Entity.BulkImportRequest.create_from_objects( - client=self.client, - project_id=self.uid, - name=name, - predictions=annotations, # type: ignore - validate=validate, - ) - else: - raise ValueError( - f"Invalid annotations given of type: {type(annotations)}" - ) - def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index 9abae1422..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,258 +0,0 @@ -from unittest.mock import patch -import uuid -from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index a0df559fc..0ec742333 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,8 +1,6 @@ from labelbox.schema.media_type import MediaType -from labelbox.schema.project import Project import pytest -from labelbox import parser from pytest_cases import parametrize, fixture_ref from labelbox.exceptions import MALValidationError @@ -12,7 +10,6 @@ NDMask, NDPolygon, NDPolyline, - NDRadio, NDRectangle, NDText, NDTextEntity, @@ -191,39 +188,6 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) -def test_validate_ndjson(tmp_path, configured_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - configured_project.upload_annotations( - name="name", annotations=str(file_path), validate=True - ) - - -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - repeat_uuid[0]["uuid"] = "test_uuid" - repeat_uuid[1]["uuid"] = "test_uuid" - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() From 5e87f4e767e847475c7bd7546f25dee68e42609c Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:41:28 -0500 Subject: [PATCH 10/44] [PLT-1463] Remove deserialize completely (#1818) --- .github/workflows/lbox-develop.yml | 4 +- .github/workflows/python-package-develop.yml | 4 +- .../data/serialization/ndjson/base.py | 12 -- .../serialization/ndjson/classification.py | 13 +- .../data/serialization/ndjson/converter.py | 14 -- .../data/serialization/ndjson/label.py | 64 +----- .../data/serialization/ndjson/metric.py | 5 +- .../labelbox/data/serialization/ndjson/mmc.py | 4 +- .../data/serialization/ndjson/objects.py | 49 ++--- .../data/serialization/ndjson/relationship.py | 4 +- libs/labelbox/src/labelbox/schema/__init__.py | 1 - libs/labelbox/src/labelbox/schema/project.py | 3 +- .../data/annotation_import/test_data_types.py | 83 -------- .../test_generic_data_types.py | 72 ------- .../test_mea_prediction_import.py | 70 ++++++- .../test_ndjson_validation.py | 194 ------------------ .../ndjson/test_generic_data_row_data.py | 79 +++++++ 17 files changed, 177 insertions(+), 498 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_import/test_data_types.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py diff --git a/.github/workflows/lbox-develop.yml b/.github/workflows/lbox-develop.yml index ba1e4f34e..efb642f66 100644 --- a/.github/workflows/lbox-develop.yml +++ b/.github/workflows/lbox-develop.yml @@ -2,9 +2,9 @@ name: LBox Develop on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/.github/workflows/python-package-develop.yml b/.github/workflows/python-package-develop.yml index 05eff5dc4..769d04c74 100644 --- a/.github/workflows/python-package-develop.yml +++ b/.github/workflows/python-package-develop.yml @@ -2,9 +2,9 @@ name: Labelbox Python SDK Staging (Develop) on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py index 75ebdc100..d8d8cd36f 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py @@ -8,18 +8,6 @@ from ....annotated_types import Cuid -subclass_registry = {} - - -class _SubclassRegistryBase(BaseModel): - model_config = ConfigDict(extra="allow") - - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - if cls.__name__ != "NDAnnotation": - with threading.Lock(): - subclass_registry[cls.__name__] = cls - class DataRow(_CamelCaseMixin): id: Optional[str] = None diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index b127c4a90..2c3215265 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -30,7 +30,6 @@ model_serializer, ) from pydantic.alias_generators import to_camel -from .base import _SubclassRegistryBase class NDAnswer(ConfidenceMixin, CustomMetricsMixin): @@ -224,7 +223,7 @@ def from_common( # ====== End of subclasses -class NDText(NDAnnotation, NDTextSubclass, _SubclassRegistryBase): +class NDText(NDAnnotation, NDTextSubclass): @classmethod def from_common( cls, @@ -249,9 +248,7 @@ def from_common( ) -class NDChecklist( - NDAnnotation, NDChecklistSubclass, VideoSupported, _SubclassRegistryBase -): +class NDChecklist(NDAnnotation, NDChecklistSubclass, VideoSupported): @model_serializer(mode="wrap") def serialize_model(self, handler): res = handler(self) @@ -298,9 +295,7 @@ def from_common( ) -class NDRadio( - NDAnnotation, NDRadioSubclass, VideoSupported, _SubclassRegistryBase -): +class NDRadio(NDAnnotation, NDRadioSubclass, VideoSupported): @classmethod def from_common( cls, @@ -343,7 +338,7 @@ def serialize_model(self, handler): return res -class NDPromptText(NDAnnotation, NDPromptTextSubclass, _SubclassRegistryBase): +class NDPromptText(NDAnnotation, NDPromptTextSubclass): @classmethod def from_common( cls, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py index 01ab8454a..8176d7862 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py @@ -26,20 +26,6 @@ class NDJsonConverter: - @staticmethod - def deserialize(json_data: Iterable[Dict[str, Any]]) -> LabelGenerator: - """ - Converts ndjson data (prediction import format) into the common labelbox format. - - Args: - json_data: An iterable representing the ndjson data - Returns: - LabelGenerator containing the ndjson data. - """ - data = NDLabel(**{"annotations": copy.copy(json_data)}) - res = data.to_common() - return res - @staticmethod def serialize( labels: LabelCollection, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 18134a228..7039ae834 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -46,7 +46,6 @@ from .relationship import NDRelationship from .base import DataRow from pydantic import BaseModel, ValidationError -from .base import subclass_registry, _SubclassRegistryBase from pydantic_core import PydanticUndefined from contextlib import suppress @@ -67,68 +66,7 @@ class NDLabel(BaseModel): - annotations: List[_SubclassRegistryBase] - - def __init__(self, **kwargs): - # NOTE: Deserialization of subclasses in pydantic is difficult, see here https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 - # Below implements the subclass registry as mentioned in the article. The python dicts we pass in can be missing certain fields - # we essentially have to infer the type against all sub classes that have the _SubclasssRegistryBase inheritance. - # It works by checking if the keys of our annotations we are missing in matches any required subclass. - # More keys are prioritized over less keys (closer match). This is used when importing json to our base models not a lot of customer workflows - # depend on this method but this works for all our existing tests with the bonus of added validation. (no subclass found it throws an error) - - for index, annotation in enumerate(kwargs["annotations"]): - if isinstance(annotation, dict): - item_annotation_keys = annotation.keys() - key_subclass_combos = defaultdict(list) - for subclass in subclass_registry.values(): - # Get all required keys from subclass - annotation_keys = [] - for k, field in subclass.model_fields.items(): - if field.default == PydanticUndefined and k != "uuid": - if ( - hasattr(field, "alias") - and field.alias in item_annotation_keys - ): - annotation_keys.append(field.alias) - elif ( - hasattr(field, "validation_alias") - and field.validation_alias - in item_annotation_keys - ): - annotation_keys.append(field.validation_alias) - else: - annotation_keys.append(k) - - key_subclass_combos[subclass].extend(annotation_keys) - - # Sort by subclass that has the most keys i.e. the one with the most keys that matches is most likely our subclass - key_subclass_combos = dict( - sorted( - key_subclass_combos.items(), - key=lambda x: len(x[1]), - reverse=True, - ) - ) - - for subclass, key_subclass_combo in key_subclass_combos.items(): - # Choose the keys from our dict we supplied that matches the required keys of a subclass - check_required_keys = all( - key in list(item_annotation_keys) - for key in key_subclass_combo - ) - if check_required_keys: - # Keep trying subclasses until we find one that has valid values (does not throw an validation error) - with suppress(ValidationError): - annotation = subclass(**annotation) - break - if isinstance(annotation, dict): - raise ValueError( - f"Could not find subclass for fields: {item_annotation_keys}" - ) - - kwargs["annotations"][index] = annotation - super().__init__(**kwargs) + annotations: AnnotationType class _Relationship(BaseModel): """This object holds information about the relationship""" diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index 60d538b19..b28e575cf 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -15,7 +15,6 @@ ConfusionMatrixMetricConfidenceValue, ) from pydantic import ConfigDict, model_serializer -from .base import _SubclassRegistryBase class BaseNDMetric(NDJsonBase): @@ -33,7 +32,7 @@ def serialize_model(self, handler): return res -class NDConfusionMatrixMetric(BaseNDMetric, _SubclassRegistryBase): +class NDConfusionMatrixMetric(BaseNDMetric): metric_value: Union[ ConfusionMatrixMetricValue, ConfusionMatrixMetricConfidenceValue ] @@ -65,7 +64,7 @@ def from_common( ) -class NDScalarMetric(BaseNDMetric, _SubclassRegistryBase): +class NDScalarMetric(BaseNDMetric): metric_value: Union[ScalarMetricValue, ScalarMetricConfidenceValue] metric_name: Optional[str] = None aggregation: Optional[ScalarMetricAggregation] = ( diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 4be24f683..74d185f45 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -2,7 +2,7 @@ from labelbox.utils import _CamelCaseMixin -from .base import _SubclassRegistryBase, DataRow, NDAnnotation +from .base import DataRow, NDAnnotation from ...annotation_types.mmc import ( MessageSingleSelectionTask, MessageMultiSelectionTask, @@ -20,7 +20,7 @@ class MessageTaskData(_CamelCaseMixin): ] -class NDMessageTask(NDAnnotation, _SubclassRegistryBase): +class NDMessageTask(NDAnnotation): message_evaluation_task: MessageTaskData def to_common(self) -> MessageEvaluationTaskAnnotation: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index a1465fa06..91abface6 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -52,7 +52,7 @@ NDSubclassification, NDSubclassificationType, ) -from .base import DataRow, NDAnnotation, NDJsonBase, _SubclassRegistryBase +from .base import DataRow, NDAnnotation, NDJsonBase from pydantic import BaseModel @@ -81,9 +81,7 @@ class Bbox(BaseModel): width: float -class NDPoint( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPoint(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): point: _Point def to_common(self) -> Point: @@ -114,7 +112,7 @@ def from_common( ) -class NDFramePoint(VideoSupported, _SubclassRegistryBase): +class NDFramePoint(VideoSupported): point: _Point classifications: List[NDSubclassificationType] = [] @@ -148,9 +146,7 @@ def from_common( ) -class NDLine( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDLine(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): line: List[_Point] def to_common(self) -> Line: @@ -181,7 +177,7 @@ def from_common( ) -class NDFrameLine(VideoSupported, _SubclassRegistryBase): +class NDFrameLine(VideoSupported): line: List[_Point] classifications: List[NDSubclassificationType] = [] @@ -215,7 +211,7 @@ def from_common( ) -class NDDicomLine(NDFrameLine, _SubclassRegistryBase): +class NDDicomLine(NDFrameLine): def to_common( self, name: str, @@ -234,9 +230,7 @@ def to_common( ) -class NDPolygon( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPolygon(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): polygon: List[_Point] def to_common(self) -> Polygon: @@ -267,9 +261,7 @@ def from_common( ) -class NDRectangle( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDRectangle(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): bbox: Bbox def to_common(self) -> Rectangle: @@ -313,7 +305,7 @@ def from_common( ) -class NDDocumentRectangle(NDRectangle, _SubclassRegistryBase): +class NDDocumentRectangle(NDRectangle): page: int unit: str @@ -360,7 +352,7 @@ def from_common( ) -class NDFrameRectangle(VideoSupported, _SubclassRegistryBase): +class NDFrameRectangle(VideoSupported): bbox: Bbox classifications: List[NDSubclassificationType] = [] @@ -496,7 +488,7 @@ def to_common( ] -class NDSegments(NDBaseObject, _SubclassRegistryBase): +class NDSegments(NDBaseObject): segments: List[NDSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -532,7 +524,7 @@ def from_common( ) -class NDDicomSegments(NDBaseObject, DicomSupported, _SubclassRegistryBase): +class NDDicomSegments(NDBaseObject, DicomSupported): segments: List[NDDicomSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -580,9 +572,7 @@ class _PNGMask(BaseModel): png: str -class NDMask( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDMask(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): mask: Union[_URIMask, _PNGMask] def to_common(self) -> Mask: @@ -646,7 +636,6 @@ class NDVideoMasks( NDJsonBase, ConfidenceMixin, CustomMetricsNotSupportedMixin, - _SubclassRegistryBase, ): masks: NDVideoMasksFramesInstances @@ -678,7 +667,7 @@ def from_common(cls, annotation, data): ) -class NDDicomMasks(NDVideoMasks, DicomSupported, _SubclassRegistryBase): +class NDDicomMasks(NDVideoMasks, DicomSupported): def to_common(self) -> DICOMMaskAnnotation: return DICOMMaskAnnotation( frames=self.masks.frames, @@ -702,9 +691,7 @@ class Location(BaseModel): end: int -class NDTextEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDTextEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): location: Location def to_common(self) -> TextEntity: @@ -738,9 +725,7 @@ def from_common( ) -class NDDocumentEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDDocumentEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): name: str text_selections: List[DocumentTextSelection] @@ -774,7 +759,7 @@ def from_common( ) -class NDConversationEntity(NDTextEntity, _SubclassRegistryBase): +class NDConversationEntity(NDTextEntity): message_id: str def to_common(self) -> ConversationEntity: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index fbea7e477..94c8e9879 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -5,7 +5,7 @@ from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType -from .base import DataRow, _SubclassRegistryBase +from .base import DataRow SUPPORTED_ANNOTATIONS = NDObjectType @@ -16,7 +16,7 @@ class _Relationship(BaseModel): type: str -class NDRelationship(NDAnnotation, _SubclassRegistryBase): +class NDRelationship(NDAnnotation): relationship: _Relationship @staticmethod diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 03327e0d1..d6b74de68 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,5 +1,4 @@ import labelbox.schema.asset_attachment -import labelbox.schema.bulk_import_request import labelbox.schema.annotation_import import labelbox.schema.benchmark import labelbox.schema.data_row diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 88153e48f..f2de4db5e 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1079,8 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) diff --git a/libs/labelbox/tests/data/annotation_import/test_data_types.py b/libs/labelbox/tests/data/annotation_import/test_data_types.py deleted file mode 100644 index 1e45295ef..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_data_types.py +++ /dev/null @@ -1,83 +0,0 @@ -import pytest - -from labelbox.data.annotation_types.data import ( - AudioData, - ConversationData, - DocumentData, - HTMLData, - ImageData, - TextData, -) -from labelbox.data.serialization import NDJsonConverter -from labelbox.data.annotation_types.data.video import VideoData - -import labelbox.types as lb_types -from labelbox.schema.media_type import MediaType - -# Unit test for label based on data type. -# TODO: Dicom removed it is unstable when you deserialize and serialize on label import. If we intend to keep this library this needs add generic data types tests work with this data type. -# TODO: add MediaType.LLMPromptResponseCreation(data gen) once supported and llm human preference once media type is added - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index 9de67bd4e..3fc6cddf6 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -28,78 +28,6 @@ def validate_iso_format(date_string: str): assert parsed_t.second is not None -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - (MediaType.LLMPromptResponseCreation, GenericDataRowData), - (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - (OntologyKind.ModelEvaluation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - # (MediaType.LLMPromptResponseCreation, GenericDataRowData), - # (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - (OntologyKind.ModelEvaluation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations - - @pytest.mark.parametrize( "configured_project, media_type", [ diff --git a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py index fccca2a3f..5f47975ad 100644 --- a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py +++ b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py @@ -1,5 +1,19 @@ import uuid from labelbox import parser +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.geometry.line import Line +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.polygon import Polygon +from labelbox.data.annotation_types.geometry.rectangle import Rectangle +from labelbox.data.annotation_types.label import Label import pytest from labelbox import ModelRun @@ -193,14 +207,60 @@ def test_create_from_label_objects( annotation_import_test_helpers, ): name = str(uuid.uuid4()) - use_data_row_ids = [ + use_data_row_id = [ p["dataRow"]["id"] for p in object_predictions_for_annotation_import ] - model_run_with_data_rows.upsert_data_rows(use_data_row_ids) - predictions = list( - NDJsonConverter.deserialize(object_predictions_for_annotation_import) - ) + model_run_with_data_rows.upsert_data_rows(use_data_row_id) + + predictions = [] + for data_row_id in use_data_row_id: + predictions.append( + Label( + data=GenericDataRowData( + uid=data_row_id, + ), + annotations=[ + ObjectAnnotation( + name="polygon", + extra={ + "uuid": "6d10fa30-3ea0-4e6c-bbb1-63f5c29fe3e4", + }, + value=Polygon( + points=[ + Point(x=147.692, y=118.154), + Point(x=142.769, y=104.923), + Point(x=57.846, y=118.769), + Point(x=28.308, y=169.846), + Point(x=147.692, y=118.154), + ], + ), + ), + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "15b7138f-4bbc-42c5-ae79-45d87b0a3b2a", + }, + value=Rectangle( + start=Point(x=58.0, y=48.0), + end=Point(x=70.0, y=113.0), + ), + ), + ObjectAnnotation( + name="polyline", + extra={ + "uuid": "cf4c6df9-c39c-4fbc-9541-470f6622978a", + }, + value=Line( + points=[ + Point(x=147.692, y=118.154), + Point(x=150.692, y=160.154), + ], + ), + ), + ], + ), + ) annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=predictions diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py deleted file mode 100644 index 0ec742333..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ /dev/null @@ -1,194 +0,0 @@ -from labelbox.schema.media_type import MediaType -import pytest - -from pytest_cases import parametrize, fixture_ref - -from labelbox.exceptions import MALValidationError -from labelbox.schema.bulk_import_request import ( - NDChecklist, - NDClassification, - NDMask, - NDPolygon, - NDPolyline, - NDRectangle, - NDText, - NDTextEntity, - NDTool, - _validate_ndjson, -) - -""" -- These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed -""" - - -def test_classification_construction(checklist_inference, text_inference): - checklist = NDClassification.build(checklist_inference[0]) - assert isinstance(checklist, NDChecklist) - text = NDClassification.build(text_inference[0]) - assert isinstance(text, NDText) - - -@parametrize( - "inference, expected_type", - [ - (fixture_ref("polygon_inference"), NDPolygon), - (fixture_ref("rectangle_inference"), NDRectangle), - (fixture_ref("line_inference"), NDPolyline), - (fixture_ref("entity_inference"), NDTextEntity), - (fixture_ref("segmentation_inference"), NDMask), - (fixture_ref("segmentation_inference_rle"), NDMask), - (fixture_ref("segmentation_inference_png"), NDMask), - ], -) -def test_tool_construction(inference, expected_type): - assert isinstance(NDTool.build(inference[0]), expected_type) - - -def no_tool(text_inference, module_project): - pred = text_inference[0].copy() - # Missing key - del pred["answer"] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -@pytest.mark.parametrize("configured_project", [MediaType.Text], indirect=True) -def test_invalid_text(text_inference, configured_project): - # and if it is not a string - pred = text_inference[0].copy() - # Extra and wrong key - del pred["answer"] - pred["answers"] = [] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) - del pred["answers"] - - # Invalid type - pred["answer"] = [] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) - - # Invalid type - pred["answer"] = None - with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) - - -def test_invalid_checklist_item(checklist_inference, module_project): - # Only two points - pred = checklist_inference[0].copy() - pred["answers"] = [pred["answers"][0], pred["answers"][0]] - # Duplicate schema ids - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [{"name": "asdfg"}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [{"schemaId": "1232132132"}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [{}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - pred["answers"] = [] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - del pred["answers"] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -def test_invalid_polygon(polygon_inference, module_project): - # Only two points - pred = polygon_inference[0].copy() - pred["polygon"] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -@pytest.mark.parametrize("configured_project", [MediaType.Text], indirect=True) -def test_incorrect_entity(entity_inference, configured_project): - entity = entity_inference[0].copy() - # Location cannot be a list - entity["location"] = [0, 10] - with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) - - entity["location"] = {"start": -1, "end": 5} - with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) - - entity["location"] = {"start": 15, "end": 5} - with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) - - -@pytest.mark.skip( - "Test wont work/fails randomly since projects have to have a media type and could be missing features from prediction list" -) -def test_all_validate_json(module_project, predictions): - # Predictions contains one of each type of prediction. - # These should be properly formatted and pass. - _validate_ndjson(predictions[0], module_project) - - -def test_incorrect_line(line_inference, module_project): - line = line_inference[0].copy() - line["line"] = [line["line"][0]] # Just one point - with pytest.raises(MALValidationError): - _validate_ndjson([line], module_project) - - -def test_incorrect_rectangle(rectangle_inference, module_project): - del rectangle_inference[0]["bbox"]["top"] - with pytest.raises(MALValidationError): - _validate_ndjson([rectangle_inference], module_project) - - -def test_duplicate_tools(rectangle_inference, module_project): - pred = rectangle_inference[0].copy() - pred["polygon"] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -def test_invalid_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - pred["schemaId"] = "blahblah" - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -def test_name_only_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - _validate_ndjson([pred], module_project) - - -def test_schema_id_only_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - del pred["name"] - ontology = module_project.ontology().normalized["tools"] - for tool in ontology: - if tool["name"] == "bbox": - feature_schema_id = tool["featureSchemaId"] - pred["schemaId"] = feature_schema_id - _validate_ndjson([pred], module_project) - - -def test_missing_feature_schema(module_project, rectangle_inference): - pred = rectangle_inference[0].copy() - del pred["name"] - with pytest.raises(MALValidationError): - _validate_ndjson([pred], module_project) - - -@pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) -def test_video_upload(video_checklist_inference, configured_project): - pred = video_checklist_inference[0].copy() - _validate_ndjson([pred], configured_project) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py new file mode 100644 index 000000000..0dc4c21c0 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py @@ -0,0 +1,79 @@ +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ClassificationAnnotation, Text + + +def test_generic_data_row_global_key(): + label_1 = Label( + data=GenericDataRowData(global_key="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"global_key": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"globalKey": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) + + +def test_generic_data_row_id(): + label_1 = Label( + data=GenericDataRowData(uid="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"uid": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"id": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) From 5fc6ff3be56536b5e672a96071930916e047816c Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 21:51:13 -0500 Subject: [PATCH 11/44] [PLT-1488] Removed coco (#1820) --- .../labelbox/data/serialization/__init__.py | 1 - .../data/serialization/coco/__init__.py | 1 - .../data/serialization/coco/annotation.py | 78 ----- .../data/serialization/coco/categories.py | 17 -- .../data/serialization/coco/converter.py | 170 ----------- .../labelbox/data/serialization/coco/image.py | 52 ---- .../serialization/coco/instance_dataset.py | 266 ------------------ .../serialization/coco/panoptic_dataset.py | 242 ---------------- .../labelbox/data/serialization/coco/path.py | 9 - .../data/serialization/coco/test_coco.py | 38 --- 10 files changed, 874 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/__init__.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/annotation.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/categories.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/converter.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/image.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/path.py delete mode 100644 libs/labelbox/tests/data/serialization/coco/test_coco.py diff --git a/libs/labelbox/src/labelbox/data/serialization/__init__.py b/libs/labelbox/src/labelbox/data/serialization/__init__.py index 71a9b3443..38cb5edff 100644 --- a/libs/labelbox/src/labelbox/data/serialization/__init__.py +++ b/libs/labelbox/src/labelbox/data/serialization/__init__.py @@ -1,2 +1 @@ from .ndjson import NDJsonConverter -from .coco import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py b/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py deleted file mode 100644 index 4511e89ee..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .converter import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py b/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py deleted file mode 100644 index e387cb7d9..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py +++ /dev/null @@ -1,78 +0,0 @@ -from typing import Any, Tuple, List, Union -from pathlib import Path -from collections import defaultdict -import warnings - -from ...annotation_types.relationship import RelationshipAnnotation -from ...annotation_types.metrics.confusion_matrix import ConfusionMatrixMetric -from ...annotation_types.metrics.scalar import ScalarMetric -from ...annotation_types.video import VideoMaskAnnotation -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.classification.classification import ( - ClassificationAnnotation, -) - -import numpy as np - -from .path import PathSerializerMixin -from pydantic import BaseModel - - -def rle_decoding(rle_arr: List[int], w: int, h: int) -> np.ndarray: - indices = [] - for idx, cnt in zip(rle_arr[0::2], rle_arr[1::2]): - indices.extend( - list(range(idx - 1, idx + cnt - 1)) - ) # RLE is 1-based index - mask = np.zeros(h * w, dtype=np.uint8) - mask[indices] = 1 - return mask.reshape((w, h)).T - - -def get_annotation_lookup(annotations): - """Get annotations from Label.annotations objects - - Args: - annotations (Label.annotations): Annotations attached to labelbox Label object used as private method - """ - annotation_lookup = defaultdict(list) - for annotation in annotations: - # Provide a default value of None if the attribute doesn't exist - attribute_value = getattr(annotation, "image_id", None) or getattr( - annotation, "name", None - ) - annotation_lookup[attribute_value].append(annotation) - return annotation_lookup - - -class SegmentInfo(BaseModel): - id: int - category_id: int - area: Union[float, int] - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class RLE(BaseModel): - counts: List[int] - size: Tuple[int, int] # h,w or w,h? - - -class COCOObjectAnnotation(BaseModel): - # All segmentations for a particular class in an image... - # So each image will have one of these for each class present in the image.. - # Annotations only exist if there is data.. - id: int - image_id: int - category_id: int - segmentation: Union[RLE, List[List[float]]] # [[x1,y1,x2,y2,x3,y3...]] - area: float - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class PanopticAnnotation(PathSerializerMixin): - # One to one relationship between image and panoptic annotation - image_id: int - file_name: Path - segments_info: List[SegmentInfo] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py b/libs/labelbox/src/labelbox/data/serialization/coco/categories.py deleted file mode 100644 index 60ba30fce..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py +++ /dev/null @@ -1,17 +0,0 @@ -import sys -from hashlib import md5 - -from pydantic import BaseModel - - -class Categories(BaseModel): - id: int - name: str - supercategory: str - isthing: int = 1 - - -def hash_category_name(name: str) -> int: - return int.from_bytes( - md5(name.encode("utf-8")).hexdigest().encode("utf-8"), "little" - ) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py b/libs/labelbox/src/labelbox/data/serialization/coco/converter.py deleted file mode 100644 index e270b7573..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py +++ /dev/null @@ -1,170 +0,0 @@ -from typing import Dict, Any, Union -from pathlib import Path -import os -import warnings - -from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...serialization.coco.instance_dataset import CocoInstanceDataset -from ...serialization.coco.panoptic_dataset import CocoPanopticDataset - - -def create_path_if_not_exists( - path: Union[Path, str], ignore_existing_data=False -): - path = Path(path) - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - elif not ignore_existing_data and os.listdir(path): - raise ValueError( - f"Directory `{path}`` must be empty. Or set `ignore_existing_data=True`" - ) - return path - - -def validate_path(path: Union[Path, str], name: str): - path = Path(path) - if not path.exists(): - raise ValueError(f"{name} `{path}` must exist") - return path - - -class COCOConverter: - """ - Class for converting between coco and labelbox formats - Note that this class is only compatible with image data. - - Subclasses are currently ignored. - To use subclasses, manually flatten them before using the converter. - """ - - @staticmethod - def serialize_instances( - labels: LabelCollection, - image_root: Union[Path, str], - ignore_existing_data=False, - max_workers=8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process - Returns: - A dictionary containing labels in the coco object format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - return CocoInstanceDataset.from_common( - labels=labels, image_root=image_root, max_workers=max_workers - ).model_dump() - - @staticmethod - def serialize_panoptic( - labels: LabelCollection, - image_root: Union[Path, str], - mask_root: Union[Path, str], - all_stuff: bool = False, - ignore_existing_data=False, - max_workers: int = 8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - mask_root: Where to save segmentation masks to - all_stuff: If rectangle or polygon annotations are encountered, they will be treated as instances. - To convert them to stuff class set `all_stuff=True`. - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process. - Returns: - A dictionary containing labels in the coco panoptic format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - mask_root = create_path_if_not_exists(mask_root, ignore_existing_data) - return CocoPanopticDataset.from_common( - labels=labels, - image_root=image_root, - mask_root=mask_root, - all_stuff=all_stuff, - max_workers=max_workers, - ).model_dump() - - @staticmethod - def deserialize_panoptic( - json_data: Dict[str, Any], - image_root: Union[Path, str], - mask_root: Union[Path, str], - ) -> LabelGenerator: - """ - Convert coco panoptic data into the labelbox format (as a LabelGenerator). - - Args: - json_data: panoptic data as a dict - image_root: Path to local images that are referenced by the panoptic json - mask_root: Path to local segmentation masks that are referenced by the panoptic json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - mask_root = validate_path(mask_root, "mask_root") - objs = CocoPanopticDataset(**json_data) - gen = objs.to_common(image_root, mask_root) - return LabelGenerator(data=gen) - - @staticmethod - def deserialize_instances( - json_data: Dict[str, Any], image_root: Path - ) -> LabelGenerator: - """ - Convert coco object data into the labelbox format (as a LabelGenerator). - - Args: - json_data: coco object data as a dict - image_root: Path to local images that are referenced by the coco object json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - objs = CocoInstanceDataset(**json_data) - gen = objs.to_common(image_root) - return LabelGenerator(data=gen) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/image.py b/libs/labelbox/src/labelbox/data/serialization/coco/image.py deleted file mode 100644 index cef173377..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/image.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path - -from typing import Optional, Tuple -from PIL import Image -import imagesize - -from .path import PathSerializerMixin -from ...annotation_types import Label - - -class CocoImage(PathSerializerMixin): - id: int - width: int - height: int - file_name: Path - license: Optional[int] = None - flickr_url: Optional[str] = None - coco_url: Optional[str] = None - - -def get_image_id(label: Label, idx: int) -> int: - if label.data.file_path is not None: - file_name = label.data.file_path.replace(".jpg", "") - if file_name.isdecimal(): - return file_name - return idx - - -def get_image(label: Label, image_root: Path, image_id: str) -> CocoImage: - path = Path(image_root, f"{image_id}.jpg") - if not path.exists(): - im = Image.fromarray(label.data.value) - im.save(path) - w, h = im.size - else: - w, h = imagesize.get(str(path)) - return CocoImage(id=image_id, width=w, height=h, file_name=Path(path.name)) - - -def id_to_rgb(id: int) -> Tuple[int, int, int]: - digits = [] - for _ in range(3): - digits.append(id % 256) - id //= 256 - return digits - - -def rgb_to_id(red: int, green: int, blue: int) -> int: - id = blue * 256 * 256 - id += green * 256 - id += red - return id diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py deleted file mode 100644 index 5241e596f..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py +++ /dev/null @@ -1,266 +0,0 @@ -# https://cocodataset.org/#format-data - -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Any, Dict, List, Tuple, Optional -from pathlib import Path - -import numpy as np -from tqdm import tqdm - -from ...annotation_types import ( - ImageData, - MaskData, - Mask, - ObjectAnnotation, - Label, - Polygon, - Point, - Rectangle, -) -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .annotation import ( - COCOObjectAnnotation, - RLE, - get_annotation_lookup, - rle_decoding, -) -from .image import CocoImage, get_image, get_image_id -from pydantic import BaseModel - - -def mask_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - # This is going to fill any holes into the multipolygon - # If you need to support holes use the panoptic data format - shapely = annotation.value.shapely.simplify(1).buffer(0) - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - # Iterate over polygon once or multiple polygon for each item - area = shapely.area - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[ - np.array(s.exterior.coords).ravel().tolist() - for s in ([shapely] if shapely.type == "Polygon" else shapely.geoms) - ], - area=area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def vector_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> COCOObjectAnnotation: - shapely = annotation.value.shapely - xmin, ymin, xmax, ymax = shapely.bounds - segmentation = [] - if isinstance(annotation.value, Polygon): - for point in annotation.value.points: - segmentation.extend([point.x, point.y]) - else: - box = annotation.value - segmentation.extend( - [ - box.start.x, - box.start.y, - box.end.x, - box.start.y, - box.end.x, - box.end.y, - box.start.x, - box.end.y, - ] - ) - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[segmentation], - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def rle_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> ObjectAnnotation: - mask = rle_decoding( - class_annotations.segmentation.counts, - *class_annotations.segmentation.size[::-1], - ) - return ObjectAnnotation( - name=class_name, - value=Mask(mask=MaskData.from_2D_arr(mask), color=[1, 1, 1]), - ) - - -def segmentations_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> List[ObjectAnnotation]: - # Technically it is polygons. But the key in coco is called segmentations.. - annotations = [] - for points in class_annotations.segmentation: - annotations.append( - ObjectAnnotation( - name=class_name, - value=Polygon( - points=[ - Point(x=points[i], y=points[i + 1]) - for i in range(0, len(points), 2) - ] - ), - ) - ) - return annotations - - -def object_annotation_to_coco( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - if isinstance(annotation.value, Mask): - return mask_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - elif isinstance(annotation.value, (Polygon, Rectangle)): - return vector_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - else: - return None - - -def process_label( - label: Label, idx: int, image_root: str, max_annotations_per_image=10000 -) -> Tuple[np.ndarray, List[COCOObjectAnnotation], Dict[str, str]]: - annot_idx = idx * max_annotations_per_image - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - coco_annotations = [] - annotation_lookup = get_annotation_lookup(label.annotations) - categories = {} - for class_name in annotation_lookup: - for annotation in annotation_lookup[class_name]: - category_id = categories.get(annotation.name) or hash_category_name( - annotation.name - ) - coco_annotation = object_annotation_to_coco( - annotation, annot_idx, image_id, category_id - ) - if coco_annotation is not None: - coco_annotations.append(coco_annotation) - if annotation.name not in categories: - categories[annotation.name] = category_id - annot_idx += 1 - - return image, coco_annotations, categories - - -class CocoInstanceDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[COCOObjectAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, labels: LabelCollection, image_root: Path, max_workers=8 - ): - all_coco_annotations = [] - categories = {} - images = [] - futures = [] - coco_categories = {} - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit(process_label, label, idx, image_root) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.extend(result[1]) - coco_categories.update(result[2]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=1, - ) - for name, idx in coco_categories.items() - ] - for annot in all_coco_annotations: - annot.category_id = category_mapping[annot.category_id] - - return CocoInstanceDataset( - info={"image_root": image_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = get_annotation_lookup(self.annotations) - - for image in self.images: - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - - data = ImageData(file_path=str(im_path)) - annotations = [] - for class_annotations in annotation_lookup[image.id]: - if isinstance(class_annotations.segmentation, RLE): - annotations.append( - rle_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - elif isinstance(class_annotations.segmentation, list): - annotations.extend( - segmentations_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - yield Label(data=data, annotations=annotations) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py deleted file mode 100644 index cbb410548..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py +++ /dev/null @@ -1,242 +0,0 @@ -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Dict, Any, List, Union -from pathlib import Path - -from tqdm import tqdm -import numpy as np -from PIL import Image - -from ...annotation_types.geometry import Polygon, Rectangle -from ...annotation_types import Label -from ...annotation_types.geometry.mask import Mask -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.data.raster import MaskData, ImageData -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .image import CocoImage, get_image, get_image_id, id_to_rgb -from .annotation import PanopticAnnotation, SegmentInfo, get_annotation_lookup -from pydantic import BaseModel - - -def vector_to_coco_segment_info( - canvas: np.ndarray, - annotation: ObjectAnnotation, - annotation_idx: int, - image: CocoImage, - category_id: int, -): - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = annotation.value.draw( - height=image.height, - width=image.width, - canvas=canvas, - color=id_to_rgb(annotation_idx), - ) - - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def mask_to_coco_segment_info( - canvas: np.ndarray, annotation, annotation_idx: int, category_id -): - color = id_to_rgb(annotation_idx) - mask = annotation.value.draw(color=color) - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = np.where(canvas == (0, 0, 0), mask, canvas) - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def process_label( - label: Label, idx: Union[int, str], image_root, mask_root, all_stuff=False -): - """ - Masks become stuff - Polygon and rectangle become thing - """ - annotations = get_annotation_lookup(label.annotations) - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - canvas = np.zeros((image.height, image.width, 3)) - - segments = [] - categories = {} - is_thing = {} - - for class_idx, class_name in enumerate(annotations): - for annotation_idx, annotation in enumerate(annotations[class_name]): - categories[annotation.name] = hash_category_name(annotation.name) - if isinstance(annotation.value, Mask): - coco_segment_info = mask_to_coco_segment_info( - canvas, - annotation, - class_idx + 1, - categories[annotation.name], - ) - - if coco_segment_info is None: - # Filter out empty masks - continue - - segment, canvas = coco_segment_info - segments.append(segment) - is_thing[annotation.name] = 0 - - elif isinstance(annotation.value, (Polygon, Rectangle)): - coco_vector_info = vector_to_coco_segment_info( - canvas, - annotation, - annotation_idx=(class_idx if all_stuff else annotation_idx) - + 1, - image=image, - category_id=categories[annotation.name], - ) - - if coco_vector_info is None: - # Filter out empty annotations - continue - - segment, canvas = coco_vector_info - segments.append(segment) - is_thing[annotation.name] = 1 - int(all_stuff) - - mask_file = str(image.file_name).replace(".jpg", ".png") - mask_file = Path(mask_root, mask_file) - Image.fromarray(canvas.astype(np.uint8)).save(mask_file) - return ( - image, - PanopticAnnotation( - image_id=image_id, - file_name=Path(mask_file.name), - segments_info=segments, - ), - categories, - is_thing, - ) - - -class CocoPanopticDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[PanopticAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, - labels: LabelCollection, - image_root, - mask_root, - all_stuff, - max_workers=8, - ): - all_coco_annotations = [] - coco_categories = {} - coco_things = {} - images = [] - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit( - process_label, - label, - idx, - image_root, - mask_root, - all_stuff, - ) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root, mask_root, all_stuff) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.append(result[1]) - coco_categories.update(result[2]) - coco_things.update(result[3]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=coco_things.get(name, 1), - ) - for name, idx in coco_categories.items() - ] - - for annot in all_coco_annotations: - for segment in annot.segments_info: - segment.category_id = category_mapping[segment.category_id] - - return CocoPanopticDataset( - info={"image_root": image_root, "mask_root": mask_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root: Path, mask_root: Path): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = { - annotation.image_id: annotation for annotation in self.annotations - } - for image in self.images: - annotations = [] - annotation = annotation_lookup[image.id] - - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - if not str(annotation.file_name).endswith(".png"): - raise ValueError( - f"COCO masks must be stored as png files and their extension must be `.png`. Found {annotation.file_name}" - ) - mask = MaskData( - file_path=str(Path(mask_root, annotation.file_name)) - ) - - for segmentation in annotation.segments_info: - category = category_lookup[segmentation.category_id] - annotations.append( - ObjectAnnotation( - name=category.name, - value=Mask(mask=mask, color=id_to_rgb(segmentation.id)), - ) - ) - data = ImageData(file_path=str(im_path)) - yield Label(data=data, annotations=annotations) - del annotation_lookup[image.id] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/path.py b/libs/labelbox/src/labelbox/data/serialization/coco/path.py deleted file mode 100644 index c3be84f31..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/path.py +++ /dev/null @@ -1,9 +0,0 @@ -from pathlib import Path -from pydantic import BaseModel, model_serializer - - -class PathSerializerMixin(BaseModel): - @model_serializer(mode="wrap") - def serialize_model(self, handler): - res = handler(self) - return {k: str(v) if isinstance(v, Path) else v for k, v in res.items()} diff --git a/libs/labelbox/tests/data/serialization/coco/test_coco.py b/libs/labelbox/tests/data/serialization/coco/test_coco.py deleted file mode 100644 index a7c733ce5..000000000 --- a/libs/labelbox/tests/data/serialization/coco/test_coco.py +++ /dev/null @@ -1,38 +0,0 @@ -import json -from pathlib import Path - -from labelbox.data.serialization.coco import COCOConverter - -COCO_ASSETS_DIR = "tests/data/assets/coco" - - -def run_instances(tmpdir): - instance_json = json.load(open(Path(COCO_ASSETS_DIR, "instances.json"))) - res = COCOConverter.deserialize_instances( - instance_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances( - res, - Path(tmpdir), - ) - - -def test_rle_objects(tmpdir): - rle_json = json.load(open(Path(COCO_ASSETS_DIR, "rle.json"))) - res = COCOConverter.deserialize_instances( - rle_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances(res, tmpdir) - - -def test_panoptic(tmpdir): - panoptic_json = json.load(open(Path(COCO_ASSETS_DIR, "panoptic.json"))) - image_dir, mask_dir = [ - Path(COCO_ASSETS_DIR, dir_name) for dir_name in ["images", "masks"] - ] - res = COCOConverter.deserialize_panoptic(panoptic_json, image_dir, mask_dir) - back = COCOConverter.serialize_panoptic( - res, - Path(f"/{tmpdir}/images_panoptic"), - Path(f"/{tmpdir}/masks_panoptic"), - ) From 9cf28a10ef556c2a674b25c26fc8bf4f75006103 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:46:16 -0500 Subject: [PATCH 12/44] Fixed video --- libs/labelbox/tests/unit/test_label_data_type.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 7bc32e37c..662fa5a5a 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -1,11 +1,7 @@ -from email import message import pytest -from pydantic import ValidationError - from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.label import Label @@ -42,9 +38,9 @@ def test_video_data_type(): "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", } with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=VideoData(**data)) + label = Label(data=GenericDataRowData(**data)) data = label.data - assert isinstance(data, VideoData) + assert isinstance(data, GenericDataRowData) assert ( data.global_key == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" From 7fc10bb4678000ba6270086b46d06bb8057b6b50 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:51:12 -0500 Subject: [PATCH 13/44] Removed data type test --- libs/labelbox/tests/unit/test_label_data_type.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 662fa5a5a..611324f78 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -33,20 +33,6 @@ def test_generic_data_type_validations(): Label(data=data) -def test_video_data_type(): - data = { - "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", - } - with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=GenericDataRowData(**data)) - data = label.data - assert isinstance(data, GenericDataRowData) - assert ( - data.global_key - == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" - ) - - def test_generic_data_row(): data = { "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", From 0b810fbc939556e5724b726613167f5da1921992 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:42:52 -0500 Subject: [PATCH 14/44] Made fix --- .../labelbox/tests/data/annotation_types/test_collection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 8b2627776..f9917cf82 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -95,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -108,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -123,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[ ObjectAnnotation( name="1234", From f8b8fa361b0c0996c62700ca3e5c75081c24d2fe Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:53:19 -0500 Subject: [PATCH 15/44] Fix list of labels --- libs/labelbox/tests/data/annotation_types/test_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index f9917cf82..57ba57962 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -20,7 +20,7 @@ @pytest.fixture def list_of_labels(): return [ - Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + Label(data=GenericDataRowData(uid="http://someurl")) for _ in range(5) ] From 2329324b93014ef313bac436248fb37c6d96108b Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:08:03 -0500 Subject: [PATCH 16/44] Removed add url --- .../data/annotation_types/collection.py | 20 ------------------- .../labelbox/data/annotation_types/label.py | 13 ------------ .../data/annotation_types/test_collection.py | 14 ------------- 3 files changed, 47 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index d90204309..9eb1fe53e 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,26 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_url_to_data( - self, signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that signs urls as data is accessed - """ - - def _add_url_to_data(label: Label): - label.add_url_to_data(signer) - return label - - self._fns["add_url_to_data"] = _add_url_to_data - return self - def add_to_dataset( self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 9d5b92bdd..a18460bc1 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -90,19 +90,6 @@ def frame_annotations( frame_dict[annotation.frame].append(annotation) return frame_dict - def add_url_to_data(self, signer) -> "Label": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - Label with updated references to new data url - """ - self.data.create_url(signer) - return self - def add_url_to_masks(self, signer) -> "Label": """ Creates signed urls for all masks in the Label. diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 57ba57962..17316f811 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,18 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_urls(signer): - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_url_to_data(signer(uuid)) - assert label.data.url != uuid - assert next(generator).data.url == uuid - assert label.data.url == uuid - - def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( @@ -113,9 +101,7 @@ def test_adding_to_dataset(signer): ) uuid = str(uuid4()) generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - assert label.data.url != uuid generated_label = next(generator) - assert generated_label.data.url == uuid assert generated_label.data.external_id != None assert generated_label.data.uid == dataset.uid assert label.data.url == uuid From 4457f25bd907c68d9cc2ee6c32eb3000b191dcf2 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:35:51 -0500 Subject: [PATCH 17/44] Removed rest of tests --- .../data/annotation_types/collection.py | 21 ----------------- .../data/annotation_types/data/test_raster.py | 13 +++++------ .../data/annotation_types/test_collection.py | 14 ----------- .../data/annotation_types/test_metrics.py | 15 ++++++++---- .../data/annotation_types/test_tiled_image.py | 23 ------------------- .../serialization/ndjson/test_conversation.py | 10 ++++---- .../data/serialization/ndjson/test_dicom.py | 14 ++++++----- .../serialization/ndjson/test_document.py | 2 +- .../serialization/ndjson/test_free_text.py | 6 ++--- .../data/serialization/ndjson/test_video.py | 15 ++++++------ 10 files changed, 41 insertions(+), 92 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 9eb1fe53e..2e76176a8 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,27 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_to_dataset( - self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates data rows from each labels data object and attaches the data to the given dataset. - Updates the label's data object to have the same external_id and uid as the data row. - - Args: - dataset: labelbox dataset object to add the new data row to - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that updates references to the new data rows as data is accessed - """ - - def _add_to_dataset(label: Label): - label.create_data_row(dataset, signer) - return label - - self._fns["assign_datarow_ids"] = _add_to_dataset - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/tests/data/annotation_types/data/test_raster.py b/libs/labelbox/tests/data/annotation_types/data/test_raster.py index 6bc8f2bbf..304ed3e95 100644 --- a/libs/labelbox/tests/data/annotation_types/data/test_raster.py +++ b/libs/labelbox/tests/data/annotation_types/data/test_raster.py @@ -5,26 +5,26 @@ import pytest from PIL import Image -from labelbox.data.annotation_types.data import ImageData +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from pydantic import ValidationError def test_validate_schema(): with pytest.raises(ValidationError): - data = ImageData() + GenericDataRowData() def test_im_bytes(): data = (np.random.random((32, 32, 3)) * 255).astype(np.uint8) im_bytes = BytesIO() Image.fromarray(data).save(im_bytes, format="PNG") - raster_data = ImageData(im_bytes=im_bytes.getvalue()) + raster_data = MaskData(im_bytes=im_bytes.getvalue()) data_ = raster_data.value assert np.all(data == data_) def test_im_url(): - raster_data = ImageData(url="https://picsum.photos/id/829/200/300") + raster_data = GenericDataRowData(url="https://picsum.photos/id/829/200/300") data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -32,7 +32,7 @@ def test_im_url(): def test_im_path(): img_path = "/tmp/img.jpg" urllib.request.urlretrieve("https://picsum.photos/id/829/200/300", img_path) - raster_data = ImageData(file_path=img_path) + raster_data = GenericDataRowData(file_path=img_path) data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -42,8 +42,7 @@ def test_ref(): uid = "uid" metadata = [] media_attributes = {} - data = ImageData( - im_bytes=b"", + data = GenericDataRowData( external_id=external_id, uid=uid, metadata=metadata, diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 17316f811..f818b94ff 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,20 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_to_dataset(signer): - dataset = FakeDataset() - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - generated_label = next(generator) - assert generated_label.data.external_id != None - assert generated_label.data.uid == dataset.uid - assert label.data.url == uuid - - def test_adding_to_masks(signer): label = Label( data=GenericDataRowData(uid="12345"), diff --git a/libs/labelbox/tests/data/annotation_types/test_metrics.py b/libs/labelbox/tests/data/annotation_types/test_metrics.py index 94c9521a5..4e9355573 100644 --- a/libs/labelbox/tests/data/annotation_types/test_metrics.py +++ b/libs/labelbox/tests/data/annotation_types/test_metrics.py @@ -8,7 +8,11 @@ ConfusionMatrixMetric, ScalarMetric, ) -from labelbox.data.annotation_types import ScalarMetric, Label, ImageData +from labelbox.data.annotation_types import ( + ScalarMetric, + Label, + GenericDataRowData, +) from labelbox.data.annotation_types.metrics.scalar import RESERVED_METRIC_NAMES from pydantic import ValidationError @@ -19,7 +23,8 @@ def test_legacy_scalar_metric(): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -72,7 +77,8 @@ def test_custom_scalar_metric(feature_name, subclass_name, aggregation, value): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -134,7 +140,8 @@ def test_custom_confusison_matrix_metric( assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 46f2383d6..9b96c9445 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,8 +6,6 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, - TileLayer, - TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -37,27 +35,6 @@ def test_tiled_bounds_same(epsg): ) -def test_create_tiled_image_data(): - bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] - url = ( - "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" - ) - zoom_levels = (1, 10) - - tile_layer = TileLayer(url=url, name="slippy map tile") - tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) - tiled_image_data = TiledImageData( - tile_layer=tile_layer, - tile_bounds=tile_bounds, - zoom_levels=zoom_levels, - version=2, - ) - assert isinstance(tiled_image_data, TiledImageData) - assert tiled_image_data.tile_bounds.bounds == bounds_points - assert tiled_image_data.tile_layer.url == url - assert tiled_image_data.zoom_levels == zoom_levels - - def test_epsg_point_projections(): zoom = 4 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..5aa7285e2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -19,7 +19,7 @@ radio_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="radio", @@ -48,7 +48,7 @@ checklist_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="checklist", @@ -78,7 +78,7 @@ ] free_text_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -164,7 +164,7 @@ def test_conversation_entity_import_without_confidence(): def test_benchmark_reference_label_flag_enabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -181,7 +181,7 @@ def test_benchmark_reference_label_flag_enabled(): def test_benchmark_reference_label_flag_disabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..6a00fa871 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -31,7 +31,7 @@ ] polyline_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), + data=lb_types.GenericDataRowData(uid="test-uid"), annotations=dicom_polyline_annotations, ) @@ -58,7 +58,7 @@ } polyline_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=dicom_polyline_annotations, ) @@ -109,11 +109,12 @@ } video_mask_label = lb_types.Label( - data=lb_types.VideoData(uid="test-uid"), annotations=[video_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[video_mask_annotation], ) video_mask_label_with_global_key = lb_types.Label( - data=lb_types.VideoData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[video_mask_annotation], ) """ @@ -128,11 +129,12 @@ ) dicom_mask_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), annotations=[dicom_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[dicom_mask_annotation], ) dicom_mask_label_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[dicom_mask_annotation], ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..fcdf4368b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -26,7 +26,7 @@ ) bbox_labels = [ lb_types.Label( - data=lb_types.DocumentData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[bbox_annotation], ) ] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..7b03a8447 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -5,7 +5,7 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -14,7 +14,7 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), @@ -38,7 +38,7 @@ def test_serialization(): def test_nested_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..6c14343a4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -6,11 +6,10 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.geometry.line import Line from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.geometry.rectangle import Rectangle -from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import ( @@ -28,7 +27,7 @@ def test_video(): labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( feature_schema_id="ckrb1sfjx099a0y914hl319ie", @@ -304,7 +303,7 @@ def test_video_name_only(): data = json.load(file) labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( name="question 1", @@ -574,7 +573,7 @@ def test_video_name_only(): def test_video_classification_global_subclassifications(): label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=[ @@ -790,7 +789,7 @@ def test_video_classification_nesting_bbox(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -940,7 +939,7 @@ def test_video_classification_point(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -1108,7 +1107,7 @@ def test_video_classification_frameline(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, From f5b6c7d33bff71659f125107cae86ac62b3a4434 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:15:43 -0500 Subject: [PATCH 18/44] Fix tests --- .../tests/data/annotation_types/test_label.py | 20 +++++++++++-------- .../test_export_video_streamable.py | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 5bdfb6bde..8439837ed 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -17,7 +17,7 @@ ObjectAnnotation, Point, Line, - ImageData, + MaskData, Label, ) import pytest @@ -26,7 +26,9 @@ def test_schema_assignment_geometry(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -51,7 +53,7 @@ def test_schema_assignment_classification(): option_name = "my_option" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ClassificationAnnotation( value=Radio(answer=ClassificationAnswer(name=option_name)), @@ -102,7 +104,7 @@ def test_schema_assignment_subclass(): value=Radio(answer=ClassificationAnswer(name=option_name)), ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -167,7 +169,9 @@ def test_highly_nested(): ], ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -230,7 +234,7 @@ def test_highly_nested(): def test_schema_assignment_confidence(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line( @@ -252,10 +256,10 @@ def test_initialize_label_no_coercion(): value=lb_types.ConversationEntity(start=0, end=8, message_id="4"), ) label = Label( - data=lb_types.ConversationData(global_key=global_key), + data=lb_types.GenericDataRowData(global_key=global_key), annotations=[ner_annotation], ) - assert isinstance(label.data, lb_types.ConversationData) + assert isinstance(label.data, lb_types.GenericDataRowData) assert label.data.global_key == global_key diff --git a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py index 115194a58..28ef6e0cf 100644 --- a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py +++ b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py @@ -4,7 +4,7 @@ import labelbox as lb import labelbox.types as lb_types -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.schema.annotation_import import AnnotationImportState from labelbox.schema.export_task import ExportTask, StreamType @@ -41,7 +41,7 @@ def test_export( for data_row_uid in data_row_uids: labels = [ lb_types.Label( - data=VideoData(uid=data_row_uid), + data=GenericDataRowData(uid=data_row_uid), annotations=bbox_video_annotation_objects, ) ] From ce60b24be86fa8f60b9aad0372757f96ee2007ed Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:16:05 -0500 Subject: [PATCH 19/44] Finish PR --- libs/labelbox/src/labelbox/data/annotation_types/label.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index a18460bc1..8ae05f898 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -3,9 +3,7 @@ import warnings import labelbox -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -19,7 +17,7 @@ from .video import VideoObjectAnnotation, VideoMaskAnnotation from .mmc import MessageEvaluationTaskAnnotation from ..ontology import get_feature_schema_lookup -from pydantic import BaseModel, field_validator, model_serializer +from pydantic import BaseModel, field_validator class Label(BaseModel): @@ -43,7 +41,7 @@ class Label(BaseModel): """ uid: Optional[Cuid] = None - data: GenericDataRowData + data: Union[GenericDataRowData, MaskData] annotations: List[ Union[ ClassificationAnnotation, From 12aa8c56aaa63ec49400d21c9432b2f057689455 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:44:14 -0500 Subject: [PATCH 20/44] Added back in tile data since some of its parameters are required --- .../data/annotation_types/data/tiled_image.py | 294 ++++++++++++++++++ .../data/annotation_types/test_tiled_image.py | 23 ++ 2 files changed, 317 insertions(+) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index cdb7f4127..adb8db549 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,6 +88,300 @@ def validate_bounds_lat_lng(self): return self +class TileLayer(BaseModel): + """Url that contains the tile layer. Must be in the format: + + https://c.tile.openstreetmap.org/{z}/{x}/{y}.png + + >>> layer = TileLayer( + url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", + name="slippy map tile" + ) + """ + + url: str + name: Optional[str] = "default" + + def asdict(self) -> Dict[str, str]: + return {"tileLayerUrl": self.url, "name": self.name} + + @field_validator("url") + def validate_url(cls, url): + xyz_format = "/{z}/{x}/{y}" + if xyz_format not in url: + raise ValueError(f"{url} needs to contain {xyz_format}") + return url + + +class TiledImageData(BaseData): + """Represents tiled imagery + + If specified version is 2, converts bounds from [lng,lat] to [lat,lng] + + Requires the following args: + tile_layer: TileLayer + tile_bounds: TiledBounds + zoom_levels: List[int] + Optional args: + max_native_zoom: int = None + tile_size: Optional[int] + version: int = 2 + alternative_layers: List[TileLayer] + + >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, + tile_bounds=TiledBounds, + zoom_levels=[1, 12]) + """ + + tile_layer: TileLayer + tile_bounds: TiledBounds + alternative_layers: List[TileLayer] = [] + zoom_levels: Tuple[int, int] + max_native_zoom: Optional[int] = None + tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE + version: Optional[int] = 2 + multithread: bool = True + + def __post_init__(self) -> None: + if self.max_native_zoom is None: + self.max_native_zoom = self.zoom_levels[0] + + def asdict(self) -> Dict[str, str]: + return { + "tileLayerUrl": self.tile_layer.url, + "bounds": [ + [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], + [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], + ], + "minZoom": self.zoom_levels[0], + "maxZoom": self.zoom_levels[1], + "maxNativeZoom": self.max_native_zoom, + "epsg": self.tile_bounds.epsg.name, + "tileSize": self.tile_size, + "alternativeLayers": [ + layer.asdict() for layer in self.alternative_layers + ], + "version": self.version, + } + + def raster_data( + self, zoom: int = 0, max_tiles: int = 32, multithread=True + ) -> RasterData: + """Converts the tiled image asset into a RasterData object containing an + np.ndarray. + + Uses the minimum zoom provided to render the image. + """ + if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: + xstart, ystart, xend, yend = self._get_simple_image_params(zoom) + elif self.tile_bounds.epsg == EPSG.EPSG4326: + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, self.tile_bounds + ) + elif self.tile_bounds.epsg == EPSG.EPSG3857: + # transform to 4326 + transformer = EPSGTransformer.create_geo_to_geo_transformer( + EPSG.EPSG3857, EPSG.EPSG4326 + ) + transforming_bounds = [ + transformer(self.tile_bounds.bounds[0]), + transformer(self.tile_bounds.bounds[1]), + ] + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, transforming_bounds + ) + else: + raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") + + self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) + + rounded_tiles, pixel_offsets = list( + zip( + *[ + self._tile_to_pixel(pt) + for pt in [xstart, ystart, xend, yend] + ] + ) + ) + + image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) + arr = self._crop_to_bounds(image, *pixel_offsets) + return RasterData(arr=arr) + + @property + def value(self) -> np.ndarray: + """Returns the value of a generated RasterData object.""" + return self.raster_data( + self.zoom_levels[0], multithread=self.multithread + ).value + + def _get_simple_image_params( + self, zoom + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + + Simple has different order of x / y than lat / lng because of how leaflet behaves + leaflet reports all points as pixel locations at a zoom of 0 + """ + xend, xstart, yend, ystart = ( + self.tile_bounds.bounds[1].x, + self.tile_bounds.bounds[0].x, + self.tile_bounds.bounds[1].y, + self.tile_bounds.bounds[0].y, + ) + return ( + *[ + x * (2 ** (zoom)) / self.tile_size + for x in [xstart, ystart, xend, yend] + ], + ) + + def _get_3857_image_params( + self, zoom: int, bounds: TiledBounds + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + """ + lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y + lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x + + # Convert to zoom 0 tile coordinates + xstart, ystart = self._latlng_to_tile(lat_start, lng_start) + xend, yend = self._latlng_to_tile(lat_end, lng_end) + + # Make sure that the tiles are increasing in order + xstart, xend = min(xstart, xend), max(xstart, xend) + ystart, yend = min(ystart, yend), max(ystart, yend) + return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) + + def _latlng_to_tile( + self, lat: float, lng: float, zoom=0 + ) -> Tuple[float, float]: + """Converts lat/lng to 3857 tile coordinates + Formula found here: + https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 + """ + scale = 2**zoom + lat_rad = math.radians(lat) + x = (lng + 180.0) / 360.0 * scale + y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale + return x, y + + def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: + """Rounds a tile coordinate and reports the remainder in pixels""" + rounded_tile = int(tile) + remainder = tile - rounded_tile + pixel_offset = int(self.tile_size * remainder) + return rounded_tile, pixel_offset + + def _fetch_image_for_bounds( + self, + x_tile_start: int, + y_tile_start: int, + x_tile_end: int, + y_tile_end: int, + zoom: int, + multithread=True, + ) -> np.ndarray: + """Fetches the tiles and combines them into a single image. + + If a tile cannot be fetched, a padding of expected tile size is instead added. + """ + + if multithread: + tiles = {} + with ThreadPoolExecutor( + max_workers=TILE_DOWNLOAD_CONCURRENCY + ) as exc: + for x in range(x_tile_start, x_tile_end + 1): + for y in range(y_tile_start, y_tile_end + 1): + tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) + + rows = [] + for y in range(y_tile_start, y_tile_end + 1): + row = [] + for x in range(x_tile_start, x_tile_end + 1): + try: + if multithread: + row.append(tiles[(x, y)].result()) + else: + row.append(self._fetch_tile(x, y, zoom)) + except: + row.append( + np.zeros( + shape=(self.tile_size, self.tile_size, 3), + dtype=np.uint8, + ) + ) + rows.append(np.hstack(row)) + + return np.vstack(rows) + + @retry.Retry(initial=1, maximum=16, multiplier=2) + def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: + """ + Fetches the image and returns an np array. + """ + data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) + data.raise_for_status() + decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] + if decoded.shape[:2] != (self.tile_size, self.tile_size): + logger.warning(f"Unexpected tile size {decoded.shape}.") + return decoded + + def _crop_to_bounds( + self, + image: np.ndarray, + x_px_start: int, + y_px_start: int, + x_px_end: int, + y_px_end: int, + ) -> np.ndarray: + """This function slices off the excess pixels that are outside of the bounds. + This occurs because only full tiles can be downloaded at a time. + """ + + def invert_point(pt): + # Must have at least 1 pixel for stability. + pt = max(pt, 1) + # All pixel points are relative to a single tile + # So subtracting the tile size inverts the axis + pt = pt - self.tile_size + return pt if pt != 0 else None + + x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) + return image[y_px_start:y_px_end, x_px_start:x_px_end, :] + + def _validate_num_tiles( + self, + xstart: float, + ystart: float, + xend: float, + yend: float, + max_tiles: int, + ): + """Calculates the number of expected tiles we would fetch. + + If this is greater than the number of max tiles, raise an error. + """ + total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) + if total_n_tiles > max_tiles: + raise ValueError( + f"Requested zoom results in {total_n_tiles} tiles." + f"Max allowed tiles are {max_tiles}" + f"Increase max tiles or reduce zoom level." + ) + + @field_validator("zoom_levels") + def validate_zoom_levels(cls, zoom_levels): + if zoom_levels[0] > zoom_levels[1]: + raise ValueError( + f"Order of zoom levels should be min, max. Received {zoom_levels}" + ) + return zoom_levels + + class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 9b96c9445..46f2383d6 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,6 +6,8 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, + TileLayer, + TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -35,6 +37,27 @@ def test_tiled_bounds_same(epsg): ) +def test_create_tiled_image_data(): + bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] + url = ( + "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" + ) + zoom_levels = (1, 10) + + tile_layer = TileLayer(url=url, name="slippy map tile") + tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) + tiled_image_data = TiledImageData( + tile_layer=tile_layer, + tile_bounds=tile_bounds, + zoom_levels=zoom_levels, + version=2, + ) + assert isinstance(tiled_image_data, TiledImageData) + assert tiled_image_data.tile_bounds.bounds == bounds_points + assert tiled_image_data.tile_layer.url == url + assert tiled_image_data.zoom_levels == zoom_levels + + def test_epsg_point_projections(): zoom = 4 From b614cedc41ea07dd35cc69c5e777ad7206a2407e Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:58:36 -0500 Subject: [PATCH 21/44] Removed data types besides generic data row data --- .../data/annotation_types/__init__.py | 14 +- .../data/annotation_types/data/__init__.py | 12 +- .../data/annotation_types/data/audio.py | 7 - .../annotation_types/data/conversation.py | 7 - .../data/annotation_types/data/dicom.py | 7 - .../data/annotation_types/data/document.py | 7 - .../data/annotation_types/data/html.py | 7 - .../data/llm_prompt_creation.py | 7 - .../data/llm_prompt_response_creation.py | 9 - .../data/llm_response_creation.py | 7 - .../data/annotation_types/data/raster.py | 7 +- .../data/annotation_types/data/text.py | 116 ------- .../data/annotation_types/data/tiled_image.py | 294 ------------------ .../data/annotation_types/data/video.py | 173 ----------- .../labelbox/data/annotation_types/label.py | 42 +-- .../serialization/ndjson/classification.py | 12 +- .../data/serialization/ndjson/label.py | 40 +-- .../data/serialization/ndjson/metric.py | 8 +- .../labelbox/data/serialization/ndjson/mmc.py | 3 +- .../data/serialization/ndjson/objects.py | 29 +- .../data/serialization/ndjson/relationship.py | 4 +- libs/labelbox/src/labelbox/utils.py | 4 +- .../data/annotation_types/test_collection.py | 16 +- .../serialization/ndjson/test_checklist.py | 14 +- .../data/serialization/ndjson/test_image.py | 3 +- .../data/serialization/ndjson/test_radio.py | 8 +- .../data/serialization/ndjson/test_text.py | 5 +- 27 files changed, 59 insertions(+), 803 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/audio.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/document.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/html.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/text.py delete mode 100644 libs/labelbox/src/labelbox/data/annotation_types/data/video.py diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index 7908bc242..84d6d65a5 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -32,18 +32,8 @@ from .classification import Radio from .classification import Text -from .data import AudioData -from .data import ConversationData -from .data import DicomData -from .data import DocumentData -from .data import HTMLData -from .data import ImageData +from .data import GenericDataRowData from .data import MaskData -from .data import TextData -from .data import VideoData -from .data import LlmPromptResponseCreationData -from .data import LlmPromptCreationData -from .data import LlmResponseCreationData from .label import Label from .collection import LabelGenerator @@ -58,8 +48,6 @@ from .data.tiled_image import EPSG from .data.tiled_image import EPSGTransformer from .data.tiled_image import TiledBounds -from .data.tiled_image import TiledImageData -from .data.tiled_image import TileLayer from .llm_prompt_response.prompt import PromptText from .llm_prompt_response.prompt import PromptClassificationAnnotation diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py index 2522b2741..8d5e7289b 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py @@ -1,12 +1,2 @@ -from .audio import AudioData -from .conversation import ConversationData -from .dicom import DicomData -from .document import DocumentData -from .html import HTMLData -from .raster import ImageData from .raster import MaskData -from .text import TextData -from .video import VideoData -from .llm_prompt_response_creation import LlmPromptResponseCreationData -from .llm_prompt_creation import LlmPromptCreationData -from .llm_response_creation import LlmResponseCreationData +from .generic_data_row_data import GenericDataRowData diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py deleted file mode 100644 index 916fca99d..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class AudioData(BaseData, _NoCoercionMixin): - class_name: Literal["AudioData"] = "AudioData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py deleted file mode 100644 index ef6507dca..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py b/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py deleted file mode 100644 index ae4c377dc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DicomData(BaseData, _NoCoercionMixin): - class_name: Literal["DicomData"] = "DicomData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py b/libs/labelbox/src/labelbox/data/annotation_types/data/document.py deleted file mode 100644 index 810a3ed3e..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DocumentData(BaseData, _NoCoercionMixin): - class_name: Literal["DocumentData"] = "DocumentData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py b/libs/labelbox/src/labelbox/data/annotation_types/data/html.py deleted file mode 100644 index 7a78fcb7b..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class HTMLData(BaseData, _NoCoercionMixin): - class_name: Literal["HTMLData"] = "HTMLData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py deleted file mode 100644 index a1b0450bc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptCreationData"] = "LlmPromptCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py deleted file mode 100644 index a8dfce894..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py +++ /dev/null @@ -1,9 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptResponseCreationData"] = ( - "LlmPromptResponseCreationData" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py deleted file mode 100644 index a8963ed3f..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmResponseCreationData"] = "LlmResponseCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index cfdc4e2f1..3a4e8bb6e 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -11,8 +11,10 @@ from requests.exceptions import ConnectTimeout from typing_extensions import Literal +from pydantic import BaseModel, model_validator, ConfigDict +from labelbox.exceptions import InternalServerError + from ..types import TypedArray -from .base_data import BaseData class RasterData(BaseModel, ABC): @@ -222,6 +224,3 @@ class MaskData(RasterData): url: Optional[str] = None arr: Optional[TypedArray[Literal['uint8']]] = None """ - - -class ImageData(RasterData, BaseData): ... diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py b/libs/labelbox/src/labelbox/data/annotation_types/data/text.py deleted file mode 100644 index cabad4836..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py +++ /dev/null @@ -1,116 +0,0 @@ -from typing import Callable, Optional - -import requests -from google.api_core import retry -from lbox.exceptions import InternalServerError -from pydantic import ConfigDict, model_validator -from requests.exceptions import ConnectTimeout - -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin - -from .base_data import BaseData - - -class TextData(BaseData, _NoCoercionMixin): - """ - Represents text data. Requires arg file_path, text, or url - - >>> TextData(text="") - - Args: - file_path (str) - text (str) - url (str) - """ - - class_name: Literal["TextData"] = "TextData" - file_path: Optional[str] = None - text: Optional[str] = None - url: Optional[str] = None - model_config = ConfigDict(extra="forbid") - - @property - def value(self) -> str: - """ - Property that unifies the data access pattern for all references to the text. - - Returns: - string representation of the text - """ - if self.text: - return self.text - elif self.file_path: - with open(self.file_path, "r") as file: - text = file.read() - self.text = text - return text - elif self.url: - text = self.fetch_remote() - self.text = text - return text - else: - raise ValueError("Must set either url, file_path or im_bytes") - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry( - deadline=15.0, - predicate=retry.if_exception_type(ConnectTimeout, InternalServerError), - ) - def fetch_remote(self) -> str: - """ - Method for accessing url. - - If url is not publicly accessible or requires another access pattern - simply override this function - """ - response = requests.get(self.url) - if response.status_code in [500, 502, 503, 504]: - raise InternalServerError(response.text) - response.raise_for_status() - return response.text - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other text references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the text - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.text is not None: - self.url = signer(self.text.encode()) - else: - raise ValueError( - "One of url, im_bytes, file_path, numpy must not be None." - ) - return self.url - - @model_validator(mode="after") - def validate_date(self, values): - file_path = self.file_path - text = self.text - url = self.url - uid = self.uid - global_key = self.global_key - if uid == file_path == text == url == global_key is None: - raise ValueError( - "One of `file_path`, `text`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"TextData(file_path={self.file_path}," - f"text={self.text[:30] + '...' if self.text is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index adb8db549..cdb7f4127 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,300 +88,6 @@ def validate_bounds_lat_lng(self): return self -class TileLayer(BaseModel): - """Url that contains the tile layer. Must be in the format: - - https://c.tile.openstreetmap.org/{z}/{x}/{y}.png - - >>> layer = TileLayer( - url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", - name="slippy map tile" - ) - """ - - url: str - name: Optional[str] = "default" - - def asdict(self) -> Dict[str, str]: - return {"tileLayerUrl": self.url, "name": self.name} - - @field_validator("url") - def validate_url(cls, url): - xyz_format = "/{z}/{x}/{y}" - if xyz_format not in url: - raise ValueError(f"{url} needs to contain {xyz_format}") - return url - - -class TiledImageData(BaseData): - """Represents tiled imagery - - If specified version is 2, converts bounds from [lng,lat] to [lat,lng] - - Requires the following args: - tile_layer: TileLayer - tile_bounds: TiledBounds - zoom_levels: List[int] - Optional args: - max_native_zoom: int = None - tile_size: Optional[int] - version: int = 2 - alternative_layers: List[TileLayer] - - >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, - tile_bounds=TiledBounds, - zoom_levels=[1, 12]) - """ - - tile_layer: TileLayer - tile_bounds: TiledBounds - alternative_layers: List[TileLayer] = [] - zoom_levels: Tuple[int, int] - max_native_zoom: Optional[int] = None - tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE - version: Optional[int] = 2 - multithread: bool = True - - def __post_init__(self) -> None: - if self.max_native_zoom is None: - self.max_native_zoom = self.zoom_levels[0] - - def asdict(self) -> Dict[str, str]: - return { - "tileLayerUrl": self.tile_layer.url, - "bounds": [ - [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], - [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], - ], - "minZoom": self.zoom_levels[0], - "maxZoom": self.zoom_levels[1], - "maxNativeZoom": self.max_native_zoom, - "epsg": self.tile_bounds.epsg.name, - "tileSize": self.tile_size, - "alternativeLayers": [ - layer.asdict() for layer in self.alternative_layers - ], - "version": self.version, - } - - def raster_data( - self, zoom: int = 0, max_tiles: int = 32, multithread=True - ) -> RasterData: - """Converts the tiled image asset into a RasterData object containing an - np.ndarray. - - Uses the minimum zoom provided to render the image. - """ - if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: - xstart, ystart, xend, yend = self._get_simple_image_params(zoom) - elif self.tile_bounds.epsg == EPSG.EPSG4326: - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, self.tile_bounds - ) - elif self.tile_bounds.epsg == EPSG.EPSG3857: - # transform to 4326 - transformer = EPSGTransformer.create_geo_to_geo_transformer( - EPSG.EPSG3857, EPSG.EPSG4326 - ) - transforming_bounds = [ - transformer(self.tile_bounds.bounds[0]), - transformer(self.tile_bounds.bounds[1]), - ] - xstart, ystart, xend, yend = self._get_3857_image_params( - zoom, transforming_bounds - ) - else: - raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") - - self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) - - rounded_tiles, pixel_offsets = list( - zip( - *[ - self._tile_to_pixel(pt) - for pt in [xstart, ystart, xend, yend] - ] - ) - ) - - image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) - arr = self._crop_to_bounds(image, *pixel_offsets) - return RasterData(arr=arr) - - @property - def value(self) -> np.ndarray: - """Returns the value of a generated RasterData object.""" - return self.raster_data( - self.zoom_levels[0], multithread=self.multithread - ).value - - def _get_simple_image_params( - self, zoom - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - - Simple has different order of x / y than lat / lng because of how leaflet behaves - leaflet reports all points as pixel locations at a zoom of 0 - """ - xend, xstart, yend, ystart = ( - self.tile_bounds.bounds[1].x, - self.tile_bounds.bounds[0].x, - self.tile_bounds.bounds[1].y, - self.tile_bounds.bounds[0].y, - ) - return ( - *[ - x * (2 ** (zoom)) / self.tile_size - for x in [xstart, ystart, xend, yend] - ], - ) - - def _get_3857_image_params( - self, zoom: int, bounds: TiledBounds - ) -> Tuple[float, float, float, float]: - """Computes the x and y tile bounds for fetching an image that - captures the entire labeling region (TiledData.bounds) given a specific zoom - """ - lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y - lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x - - # Convert to zoom 0 tile coordinates - xstart, ystart = self._latlng_to_tile(lat_start, lng_start) - xend, yend = self._latlng_to_tile(lat_end, lng_end) - - # Make sure that the tiles are increasing in order - xstart, xend = min(xstart, xend), max(xstart, xend) - ystart, yend = min(ystart, yend), max(ystart, yend) - return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) - - def _latlng_to_tile( - self, lat: float, lng: float, zoom=0 - ) -> Tuple[float, float]: - """Converts lat/lng to 3857 tile coordinates - Formula found here: - https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 - """ - scale = 2**zoom - lat_rad = math.radians(lat) - x = (lng + 180.0) / 360.0 * scale - y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale - return x, y - - def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: - """Rounds a tile coordinate and reports the remainder in pixels""" - rounded_tile = int(tile) - remainder = tile - rounded_tile - pixel_offset = int(self.tile_size * remainder) - return rounded_tile, pixel_offset - - def _fetch_image_for_bounds( - self, - x_tile_start: int, - y_tile_start: int, - x_tile_end: int, - y_tile_end: int, - zoom: int, - multithread=True, - ) -> np.ndarray: - """Fetches the tiles and combines them into a single image. - - If a tile cannot be fetched, a padding of expected tile size is instead added. - """ - - if multithread: - tiles = {} - with ThreadPoolExecutor( - max_workers=TILE_DOWNLOAD_CONCURRENCY - ) as exc: - for x in range(x_tile_start, x_tile_end + 1): - for y in range(y_tile_start, y_tile_end + 1): - tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) - - rows = [] - for y in range(y_tile_start, y_tile_end + 1): - row = [] - for x in range(x_tile_start, x_tile_end + 1): - try: - if multithread: - row.append(tiles[(x, y)].result()) - else: - row.append(self._fetch_tile(x, y, zoom)) - except: - row.append( - np.zeros( - shape=(self.tile_size, self.tile_size, 3), - dtype=np.uint8, - ) - ) - rows.append(np.hstack(row)) - - return np.vstack(rows) - - @retry.Retry(initial=1, maximum=16, multiplier=2) - def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: - """ - Fetches the image and returns an np array. - """ - data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) - data.raise_for_status() - decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] - if decoded.shape[:2] != (self.tile_size, self.tile_size): - logger.warning(f"Unexpected tile size {decoded.shape}.") - return decoded - - def _crop_to_bounds( - self, - image: np.ndarray, - x_px_start: int, - y_px_start: int, - x_px_end: int, - y_px_end: int, - ) -> np.ndarray: - """This function slices off the excess pixels that are outside of the bounds. - This occurs because only full tiles can be downloaded at a time. - """ - - def invert_point(pt): - # Must have at least 1 pixel for stability. - pt = max(pt, 1) - # All pixel points are relative to a single tile - # So subtracting the tile size inverts the axis - pt = pt - self.tile_size - return pt if pt != 0 else None - - x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) - return image[y_px_start:y_px_end, x_px_start:x_px_end, :] - - def _validate_num_tiles( - self, - xstart: float, - ystart: float, - xend: float, - yend: float, - max_tiles: int, - ): - """Calculates the number of expected tiles we would fetch. - - If this is greater than the number of max tiles, raise an error. - """ - total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) - if total_n_tiles > max_tiles: - raise ValueError( - f"Requested zoom results in {total_n_tiles} tiles." - f"Max allowed tiles are {max_tiles}" - f"Increase max tiles or reduce zoom level." - ) - - @field_validator("zoom_levels") - def validate_zoom_levels(cls, zoom_levels): - if zoom_levels[0] > zoom_levels[1]: - raise ValueError( - f"Order of zoom levels should be min, max. Received {zoom_levels}" - ) - return zoom_levels - - class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py b/libs/labelbox/src/labelbox/data/annotation_types/data/video.py deleted file mode 100644 index 0f40911d8..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py +++ /dev/null @@ -1,173 +0,0 @@ -import logging -import os -import urllib.request -from typing import Callable, Dict, Generator, Optional, Tuple -from typing_extensions import Literal -from uuid import uuid4 - -import cv2 -import numpy as np -from google.api_core import retry - -from .base_data import BaseData -from ..types import TypedArray - -from pydantic import ConfigDict, model_validator - -logger = logging.getLogger(__name__) - - -class VideoData(BaseData): - """ - Represents video - """ - - file_path: Optional[str] = None - url: Optional[str] = None - frames: Optional[Dict[int, TypedArray[Literal["uint8"]]]] = None - # Required for discriminating between data types - model_config = ConfigDict(extra="forbid") - - def load_frames(self, overwrite: bool = False) -> None: - """ - Loads all frames into memory at once in order to access in non-sequential order. - This will use a lot of memory, especially for longer videos - - Args: - overwrite: Replace existing frames - """ - if self.frames and not overwrite: - return - - for count, frame in self.frame_generator(): - if self.frames is None: - self.frames = {} - self.frames[count] = frame - - @property - def value(self): - return self.frame_generator() - - def frame_generator( - self, cache_frames=False, download_dir="/tmp" - ) -> Generator[Tuple[int, np.ndarray], None, None]: - """ - A generator for accessing individual frames in a video. - - Args: - cache_frames (bool): Whether or not to cache frames while iterating through the video. - download_dir (str): Directory to save the video to. Defaults to `/tmp` dir - """ - if self.frames is not None: - for idx, frame in self.frames.items(): - yield idx, frame - return - elif self.url and not self.file_path: - file_path = os.path.join(download_dir, f"{uuid4()}.mp4") - logger.info("Downloading the video locally to %s", file_path) - self.fetch_remote(file_path) - self.file_path = file_path - - vidcap = cv2.VideoCapture(self.file_path) - - success, frame = vidcap.read() - count = 0 - if cache_frames: - self.frames = {} - while success: - frame = frame[:, :, ::-1] - yield count, frame - if cache_frames: - self.frames[count] = frame - success, frame = vidcap.read() - count += 1 - - def __getitem__(self, idx: int) -> np.ndarray: - if self.frames is None: - raise ValueError( - "Cannot select by index without iterating over the entire video or loading all frames." - ) - return self.frames[idx] - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry(deadline=15.0) - def fetch_remote(self, local_path) -> None: - """ - Method for downloading data from self.url - - If url is not publicly accessible or requires another access pattern - simply override this function - - Args: - local_path: Where to save the thing too. - """ - urllib.request.urlretrieve(self.url, local_path) - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other video references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the video - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.frames is not None: - self.file_path = self.frames_to_video(self.frames) - self.url = self.create_url(signer) - else: - raise ValueError("One of url, file_path, frames must not be None.") - return self.url - - def frames_to_video( - self, frames: Dict[int, np.ndarray], fps=20, save_dir="/tmp" - ) -> str: - """ - Compresses the data by converting a set of individual frames to a single video. - - """ - file_path = os.path.join(save_dir, f"{uuid4()}.mp4") - out = None - for key in frames.keys(): - frame = frames[key] - if out is None: - out = cv2.VideoWriter( - file_path, - cv2.VideoWriter_fourcc(*"MP4V"), - fps, - frame.shape[:2], - ) - out.write(frame) - if out is None: - return - out.release() - return file_path - - @model_validator(mode="after") - def validate_data(self): - file_path = self.file_path - url = self.url - frames = self.frames - uid = self.uid - global_key = self.global_key - - if uid == file_path == frames == url == global_key is None: - raise ValueError( - "One of `file_path`, `frames`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"VideoData(file_path={self.file_path}," - f"frames={'...' if self.frames is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 7eef43f31..9d5b92bdd 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -6,7 +6,6 @@ from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.tiled_image import TiledImageData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -14,19 +13,6 @@ from .relationship import RelationshipAnnotation from .llm_prompt_response.prompt import PromptClassificationAnnotation from .classification import ClassificationAnswer -from .data import ( - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - ImageData, - TextData, - VideoData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, -) from .geometry import Mask from .metrics import ScalarMetric, ConfusionMatrixMetric from .video import VideoClassificationAnnotation @@ -35,22 +21,6 @@ from ..ontology import get_feature_schema_lookup from pydantic import BaseModel, field_validator, model_serializer -DataType = Union[ - VideoData, - ImageData, - TextData, - TiledImageData, - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, - GenericDataRowData, -] - class Label(BaseModel): """Container for holding data and annotations @@ -67,14 +37,13 @@ class Label(BaseModel): Args: uid: Optional Label Id in Labelbox - data: Data of Label, Image, Video, Text or dict with a single key uid | global_key | external_id. - Note use of classes as data is deprecated. Use GenericDataRowData or dict with a single key instead. + data: GenericDataRowData or dict with a single key uid | global_key | external_id. annotations: List of Annotations in the label extra: additional context """ uid: Optional[Cuid] = None - data: DataType + data: GenericDataRowData annotations: List[ Union[ ClassificationAnnotation, @@ -94,13 +63,6 @@ class Label(BaseModel): def validate_data(cls, data): if isinstance(data, Dict): return GenericDataRowData(**data) - elif isinstance(data, GenericDataRowData): - return data - else: - warnings.warn( - f"Using {type(data).__name__} class for label.data is deprecated. " - "Use a dict or an instance of GenericDataRowData instead." - ) return data def object_annotations(self) -> List[ObjectAnnotation]: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index 2c3215265..86cf0d094 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Union, Optional -from labelbox.data.annotation_types import ImageData, TextData, VideoData +from labelbox.data.annotation_types import GenericDataRowData from labelbox.data.mixins import ( ConfidenceMixin, CustomMetric, @@ -232,7 +232,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, ) -> "NDText": @@ -264,7 +264,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: Union[GenericDataRowData], message_id: str, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, @@ -304,7 +304,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, ) -> "NDRadio": @@ -427,7 +427,7 @@ def from_common( annotation: Union[ ClassificationAnnotation, VideoClassificationAnnotation ], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: classify_obj = cls.lookup_classification(annotation) if classify_obj is None: @@ -475,7 +475,7 @@ def to_common( def from_common( cls, annotation: Union[PromptClassificationAnnotation], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: return NDPromptText.from_common( str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 7039ae834..ffaefb4d7 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -14,7 +14,6 @@ ) from ...annotation_types.video import VideoObjectAnnotation, VideoMaskAnnotation from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...annotation_types.data import DicomData, ImageData, TextData, VideoData from ...annotation_types.data.generic_data_row_data import GenericDataRowData from ...annotation_types.label import Label from ...annotation_types.ner import TextEntity, ConversationEntity @@ -214,46 +213,9 @@ def _generate_annotations( yield Label( annotations=annotations, - data=self._infer_media_type(group.data_row, annotations), + data=GenericDataRowData, ) - def _infer_media_type( - self, - data_row: DataRow, - annotations: List[ - Union[ - TextEntity, - ConversationEntity, - VideoClassificationAnnotation, - DICOMObjectAnnotation, - VideoObjectAnnotation, - ObjectAnnotation, - ClassificationAnnotation, - ScalarMetric, - ConfusionMatrixMetric, - ] - ], - ) -> Union[TextData, VideoData, ImageData]: - if len(annotations) == 0: - raise ValueError("Missing annotations while inferring media type") - - types = {type(annotation) for annotation in annotations} - data = GenericDataRowData - if (TextEntity in types) or (ConversationEntity in types): - data = TextData - elif ( - VideoClassificationAnnotation in types - or VideoObjectAnnotation in types - ): - data = VideoData - elif DICOMObjectAnnotation in types: - data = DicomData - - if data_row.id: - return data(uid=data_row.id) - else: - return data(global_key=data_row.global_key) - @staticmethod def _get_consecutive_frames( frames_indices: List[int], diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index b28e575cf..f8b522ab5 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -1,6 +1,6 @@ from typing import Optional, Union, Type -from labelbox.data.annotation_types.data import ImageData, TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.serialization.ndjson.base import DataRow, NDJsonBase from labelbox.data.annotation_types.metrics.scalar import ( ScalarMetric, @@ -51,7 +51,7 @@ def to_common(self) -> ConfusionMatrixMetric: @classmethod def from_common( - cls, metric: ConfusionMatrixMetric, data: Union[TextData, ImageData] + cls, metric: ConfusionMatrixMetric, data: GenericDataRowData ) -> "NDConfusionMatrixMetric": return cls( uuid=metric.extra.get("uuid"), @@ -83,7 +83,7 @@ def to_common(self) -> ScalarMetric: @classmethod def from_common( - cls, metric: ScalarMetric, data: Union[TextData, ImageData] + cls, metric: ScalarMetric, data: GenericDataRowData ) -> "NDScalarMetric": return cls( uuid=metric.extra.get("uuid"), @@ -107,7 +107,7 @@ def to_common( def from_common( cls, annotation: Union[ScalarMetric, ConfusionMatrixMetric], - data: Union[TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDScalarMetric, NDConfusionMatrixMetric]: obj = cls.lookup_object(annotation) return obj.from_common(annotation, data) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 74d185f45..b2dcfb5b4 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -9,6 +9,7 @@ MessageRankingTask, MessageEvaluationTaskAnnotation, ) +from ...annotation_types import GenericDataRowData class MessageTaskData(_CamelCaseMixin): @@ -35,7 +36,7 @@ def to_common(self) -> MessageEvaluationTaskAnnotation: def from_common( cls, annotation: MessageEvaluationTaskAnnotation, - data: Any, # Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDMessageTask": return cls( uuid=str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index 91abface6..1bcba7a89 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Tuple, Union, Optional import base64 +from labelbox.data.annotation_types.data.raster import MaskData from labelbox.data.annotation_types.ner.conversation_entity import ( ConversationEntity, ) @@ -21,9 +22,9 @@ from PIL import Image from labelbox.data.annotation_types import feature -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData -from ...annotation_types.data import ImageData, TextData, MaskData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.ner import ( DocumentEntity, DocumentTextSelection, @@ -96,7 +97,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPoint": @@ -161,7 +162,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDLine": @@ -245,7 +246,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPolygon": @@ -282,7 +283,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -329,7 +330,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -508,7 +509,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[VideoObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -545,7 +546,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[DICOMObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -601,7 +602,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDMask": @@ -706,7 +707,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDTextEntity": @@ -743,7 +744,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDDocumentEntity": @@ -778,7 +779,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDConversationEntity": @@ -836,7 +837,7 @@ def from_common( List[List[VideoObjectAnnotation]], VideoMaskAnnotation, ], - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> Union[ NDLine, NDPoint, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index 94c8e9879..d558ac244 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -1,7 +1,7 @@ from typing import Union from pydantic import BaseModel from .base import NDAnnotation, DataRow -from ...annotation_types.data import ImageData, TextData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType @@ -40,7 +40,7 @@ def to_common( def from_common( cls, annotation: RelationshipAnnotation, - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDRelationship": relationship = annotation.value return cls( diff --git a/libs/labelbox/src/labelbox/utils.py b/libs/labelbox/src/labelbox/utils.py index c76ce188f..dcf51be82 100644 --- a/libs/labelbox/src/labelbox/utils.py +++ b/libs/labelbox/src/labelbox/utils.py @@ -87,8 +87,8 @@ class _NoCoercionMixin: when serializing the object. Example: - class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" + class GenericDataRowData(BaseData, _NoCoercionMixin): + class_name: Literal["GenericDataRowData"] = "GenericDataRowData" """ diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 1c9cd669e..c16f61b64 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -7,19 +7,21 @@ from labelbox.data.annotation_types import ( LabelGenerator, ObjectAnnotation, - ImageData, - MaskData, Line, Mask, Point, Label, + GenericDataRowData, + MaskData, ) from labelbox import OntologyBuilder, Tool @pytest.fixture def list_of_labels(): - return [Label(data=ImageData(url="http://someurl")) for _ in range(5)] + return [ + Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + ] @pytest.fixture @@ -73,7 +75,7 @@ def test_conversion(list_of_labels): def test_adding_schema_ids(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=GenericDataRowData(uid="123456"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -93,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -106,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[], ) uuid = str(uuid4()) @@ -121,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData("12345"), annotations=[ ObjectAnnotation( name="1234", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..fb78916f4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -4,7 +4,7 @@ ClassificationAnswer, Radio, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -13,9 +13,8 @@ def test_serialization_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -41,9 +40,8 @@ def test_serialization_min(): def test_serialization_with_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -131,9 +129,8 @@ def test_serialization_with_classification(): def test_serialization_with_classification_double_nested(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -224,9 +221,8 @@ def test_serialization_with_classification_double_nested(): def test_serialization_with_classification_double_nested_2(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..4d615658c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -11,7 +11,6 @@ Mask, Label, ObjectAnnotation, - ImageData, MaskData, ) from labelbox.types import Rectangle, Polygon, Point @@ -262,7 +261,7 @@ def test_mask_from_arr(): ), ) ], - data=ImageData(uid="0" * 25), + data=GenericDataRowData(uid="0" * 25), ) res = next(NDJsonConverter.serialize([label])) res.pop("uuid") diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..ec57f0528 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -3,7 +3,7 @@ ClassificationAnswer, ) from labelbox.data.annotation_types.classification.classification import Radio -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -12,9 +12,8 @@ def test_serialization_with_radio_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -43,9 +42,8 @@ def test_serialization_with_radio_min(): def test_serialization_with_radio_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..28eba07bd 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -2,7 +2,7 @@ from labelbox.data.annotation_types.classification.classification import ( Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -11,9 +11,8 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( From e31d118fc9b7cfe666917b565298b67240d513ae Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:46:16 -0500 Subject: [PATCH 22/44] Fixed video --- libs/labelbox/tests/unit/test_label_data_type.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 7bc32e37c..662fa5a5a 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -1,11 +1,7 @@ -from email import message import pytest -from pydantic import ValidationError - from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.label import Label @@ -42,9 +38,9 @@ def test_video_data_type(): "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", } with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=VideoData(**data)) + label = Label(data=GenericDataRowData(**data)) data = label.data - assert isinstance(data, VideoData) + assert isinstance(data, GenericDataRowData) assert ( data.global_key == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" From 1a8189a254423dfa66964a2f2b1d57bd03061ed7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:51:12 -0500 Subject: [PATCH 23/44] Removed data type test --- libs/labelbox/tests/unit/test_label_data_type.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 662fa5a5a..611324f78 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -33,20 +33,6 @@ def test_generic_data_type_validations(): Label(data=data) -def test_video_data_type(): - data = { - "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", - } - with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=GenericDataRowData(**data)) - data = label.data - assert isinstance(data, GenericDataRowData) - assert ( - data.global_key - == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" - ) - - def test_generic_data_row(): data = { "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", From 62f5fbdbb9d58be60f8327784f3e1da63d2c6005 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:42:52 -0500 Subject: [PATCH 24/44] Made fix --- .../labelbox/tests/data/annotation_types/test_collection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index c16f61b64..26b91fc61 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -95,7 +95,7 @@ def test_adding_schema_ids(): def test_adding_urls(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -108,7 +108,7 @@ def test_adding_urls(signer): def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[], ) uuid = str(uuid4()) @@ -123,7 +123,7 @@ def test_adding_to_dataset(signer): def test_adding_to_masks(signer): label = Label( - data=GenericDataRowData("12345"), + data=GenericDataRowData(uid="12345"), annotations=[ ObjectAnnotation( name="1234", From 6c11e746cdf1542cc2fa6155ccc0e7efcfcdf8c7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:53:19 -0500 Subject: [PATCH 25/44] Fix list of labels --- libs/labelbox/tests/data/annotation_types/test_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 26b91fc61..e7e51e951 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -20,7 +20,7 @@ @pytest.fixture def list_of_labels(): return [ - Label(data=GenericDataRowData(url="http://someurl")) for _ in range(5) + Label(data=GenericDataRowData(uid="http://someurl")) for _ in range(5) ] From 1ef53cc7608e93eb062cb6e833cc3e9aa822827d Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:08:03 -0500 Subject: [PATCH 26/44] Removed add url --- .../data/annotation_types/collection.py | 20 ------------------- .../labelbox/data/annotation_types/label.py | 13 ------------ .../data/annotation_types/test_collection.py | 13 ------------ 3 files changed, 46 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index d90204309..9eb1fe53e 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,26 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_url_to_data( - self, signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that signs urls as data is accessed - """ - - def _add_url_to_data(label: Label): - label.add_url_to_data(signer) - return label - - self._fns["add_url_to_data"] = _add_url_to_data - return self - def add_to_dataset( self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 9d5b92bdd..a18460bc1 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -90,19 +90,6 @@ def frame_annotations( frame_dict[annotation.frame].append(annotation) return frame_dict - def add_url_to_data(self, signer) -> "Label": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - Label with updated references to new data url - """ - self.data.create_url(signer) - return self - def add_url_to_masks(self, signer) -> "Label": """ Creates signed urls for all masks in the Label. diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index e7e51e951..df0d9b007 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,18 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_urls(signer): - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_url_to_data(signer(uuid)) - assert label.data.url != uuid - assert next(generator).data.url == uuid - assert label.data.url == uuid - - def test_adding_to_dataset(signer): dataset = FakeDataset() label = Label( @@ -113,7 +101,6 @@ def test_adding_to_dataset(signer): ) uuid = str(uuid4()) generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - assert label.data.url != uuid generated_label = next(generator) assert generated_label.data.url == uuid assert generated_label.data.external_id is not None From 159e22711779ecc0a5b411b3f8da49cb6c0845eb Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:35:51 -0500 Subject: [PATCH 27/44] Removed rest of tests --- .../data/annotation_types/collection.py | 21 ----------------- .../data/annotation_types/data/test_raster.py | 13 +++++------ .../data/annotation_types/test_collection.py | 15 ------------ .../data/annotation_types/test_metrics.py | 15 ++++++++---- .../data/annotation_types/test_tiled_image.py | 23 ------------------- .../serialization/ndjson/test_conversation.py | 10 ++++---- .../data/serialization/ndjson/test_dicom.py | 14 ++++++----- .../serialization/ndjson/test_document.py | 2 +- .../serialization/ndjson/test_free_text.py | 6 ++--- .../data/serialization/ndjson/test_video.py | 15 ++++++------ 10 files changed, 41 insertions(+), 93 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 9eb1fe53e..2e76176a8 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,27 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_to_dataset( - self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates data rows from each labels data object and attaches the data to the given dataset. - Updates the label's data object to have the same external_id and uid as the data row. - - Args: - dataset: labelbox dataset object to add the new data row to - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that updates references to the new data rows as data is accessed - """ - - def _add_to_dataset(label: Label): - label.create_data_row(dataset, signer) - return label - - self._fns["assign_datarow_ids"] = _add_to_dataset - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/tests/data/annotation_types/data/test_raster.py b/libs/labelbox/tests/data/annotation_types/data/test_raster.py index 6bc8f2bbf..304ed3e95 100644 --- a/libs/labelbox/tests/data/annotation_types/data/test_raster.py +++ b/libs/labelbox/tests/data/annotation_types/data/test_raster.py @@ -5,26 +5,26 @@ import pytest from PIL import Image -from labelbox.data.annotation_types.data import ImageData +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from pydantic import ValidationError def test_validate_schema(): with pytest.raises(ValidationError): - data = ImageData() + GenericDataRowData() def test_im_bytes(): data = (np.random.random((32, 32, 3)) * 255).astype(np.uint8) im_bytes = BytesIO() Image.fromarray(data).save(im_bytes, format="PNG") - raster_data = ImageData(im_bytes=im_bytes.getvalue()) + raster_data = MaskData(im_bytes=im_bytes.getvalue()) data_ = raster_data.value assert np.all(data == data_) def test_im_url(): - raster_data = ImageData(url="https://picsum.photos/id/829/200/300") + raster_data = GenericDataRowData(url="https://picsum.photos/id/829/200/300") data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -32,7 +32,7 @@ def test_im_url(): def test_im_path(): img_path = "/tmp/img.jpg" urllib.request.urlretrieve("https://picsum.photos/id/829/200/300", img_path) - raster_data = ImageData(file_path=img_path) + raster_data = GenericDataRowData(file_path=img_path) data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -42,8 +42,7 @@ def test_ref(): uid = "uid" metadata = [] media_attributes = {} - data = ImageData( - im_bytes=b"", + data = GenericDataRowData( external_id=external_id, uid=uid, metadata=metadata, diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index df0d9b007..f818b94ff 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -93,21 +93,6 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_to_dataset(signer): - dataset = FakeDataset() - label = Label( - data=GenericDataRowData(uid="12345"), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - generated_label = next(generator) - assert generated_label.data.url == uuid - assert generated_label.data.external_id is not None - assert generated_label.data.uid == dataset.uid - assert label.data.url == uuid - - def test_adding_to_masks(signer): label = Label( data=GenericDataRowData(uid="12345"), diff --git a/libs/labelbox/tests/data/annotation_types/test_metrics.py b/libs/labelbox/tests/data/annotation_types/test_metrics.py index 94c9521a5..4e9355573 100644 --- a/libs/labelbox/tests/data/annotation_types/test_metrics.py +++ b/libs/labelbox/tests/data/annotation_types/test_metrics.py @@ -8,7 +8,11 @@ ConfusionMatrixMetric, ScalarMetric, ) -from labelbox.data.annotation_types import ScalarMetric, Label, ImageData +from labelbox.data.annotation_types import ( + ScalarMetric, + Label, + GenericDataRowData, +) from labelbox.data.annotation_types.metrics.scalar import RESERVED_METRIC_NAMES from pydantic import ValidationError @@ -19,7 +23,8 @@ def test_legacy_scalar_metric(): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -72,7 +77,8 @@ def test_custom_scalar_metric(feature_name, subclass_name, aggregation, value): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -134,7 +140,8 @@ def test_custom_confusison_matrix_metric( assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 46f2383d6..9b96c9445 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,8 +6,6 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, - TileLayer, - TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -37,27 +35,6 @@ def test_tiled_bounds_same(epsg): ) -def test_create_tiled_image_data(): - bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] - url = ( - "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" - ) - zoom_levels = (1, 10) - - tile_layer = TileLayer(url=url, name="slippy map tile") - tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) - tiled_image_data = TiledImageData( - tile_layer=tile_layer, - tile_bounds=tile_bounds, - zoom_levels=zoom_levels, - version=2, - ) - assert isinstance(tiled_image_data, TiledImageData) - assert tiled_image_data.tile_bounds.bounds == bounds_points - assert tiled_image_data.tile_layer.url == url - assert tiled_image_data.zoom_levels == zoom_levels - - def test_epsg_point_projections(): zoom = 4 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..5aa7285e2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -19,7 +19,7 @@ radio_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="radio", @@ -48,7 +48,7 @@ checklist_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="checklist", @@ -78,7 +78,7 @@ ] free_text_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -164,7 +164,7 @@ def test_conversation_entity_import_without_confidence(): def test_benchmark_reference_label_flag_enabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -181,7 +181,7 @@ def test_benchmark_reference_label_flag_enabled(): def test_benchmark_reference_label_flag_disabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..6a00fa871 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -31,7 +31,7 @@ ] polyline_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), + data=lb_types.GenericDataRowData(uid="test-uid"), annotations=dicom_polyline_annotations, ) @@ -58,7 +58,7 @@ } polyline_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=dicom_polyline_annotations, ) @@ -109,11 +109,12 @@ } video_mask_label = lb_types.Label( - data=lb_types.VideoData(uid="test-uid"), annotations=[video_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[video_mask_annotation], ) video_mask_label_with_global_key = lb_types.Label( - data=lb_types.VideoData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[video_mask_annotation], ) """ @@ -128,11 +129,12 @@ ) dicom_mask_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), annotations=[dicom_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[dicom_mask_annotation], ) dicom_mask_label_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[dicom_mask_annotation], ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..fcdf4368b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -26,7 +26,7 @@ ) bbox_labels = [ lb_types.Label( - data=lb_types.DocumentData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[bbox_annotation], ) ] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..7b03a8447 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -5,7 +5,7 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -14,7 +14,7 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), @@ -38,7 +38,7 @@ def test_serialization(): def test_nested_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..6c14343a4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -6,11 +6,10 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.geometry.line import Line from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.geometry.rectangle import Rectangle -from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import ( @@ -28,7 +27,7 @@ def test_video(): labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( feature_schema_id="ckrb1sfjx099a0y914hl319ie", @@ -304,7 +303,7 @@ def test_video_name_only(): data = json.load(file) labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( name="question 1", @@ -574,7 +573,7 @@ def test_video_name_only(): def test_video_classification_global_subclassifications(): label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=[ @@ -790,7 +789,7 @@ def test_video_classification_nesting_bbox(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -940,7 +939,7 @@ def test_video_classification_point(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -1108,7 +1107,7 @@ def test_video_classification_frameline(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, From 5f625c8ba890e73e05764916f3a62b022969d913 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:15:43 -0500 Subject: [PATCH 28/44] Fix tests --- .../tests/data/annotation_types/test_label.py | 20 +++++++++++-------- .../test_export_video_streamable.py | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 5bdfb6bde..8439837ed 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -17,7 +17,7 @@ ObjectAnnotation, Point, Line, - ImageData, + MaskData, Label, ) import pytest @@ -26,7 +26,9 @@ def test_schema_assignment_geometry(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -51,7 +53,7 @@ def test_schema_assignment_classification(): option_name = "my_option" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ClassificationAnnotation( value=Radio(answer=ClassificationAnswer(name=option_name)), @@ -102,7 +104,7 @@ def test_schema_assignment_subclass(): value=Radio(answer=ClassificationAnswer(name=option_name)), ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -167,7 +169,9 @@ def test_highly_nested(): ], ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -230,7 +234,7 @@ def test_highly_nested(): def test_schema_assignment_confidence(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line( @@ -252,10 +256,10 @@ def test_initialize_label_no_coercion(): value=lb_types.ConversationEntity(start=0, end=8, message_id="4"), ) label = Label( - data=lb_types.ConversationData(global_key=global_key), + data=lb_types.GenericDataRowData(global_key=global_key), annotations=[ner_annotation], ) - assert isinstance(label.data, lb_types.ConversationData) + assert isinstance(label.data, lb_types.GenericDataRowData) assert label.data.global_key == global_key diff --git a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py index 115194a58..28ef6e0cf 100644 --- a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py +++ b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py @@ -4,7 +4,7 @@ import labelbox as lb import labelbox.types as lb_types -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.schema.annotation_import import AnnotationImportState from labelbox.schema.export_task import ExportTask, StreamType @@ -41,7 +41,7 @@ def test_export( for data_row_uid in data_row_uids: labels = [ lb_types.Label( - data=VideoData(uid=data_row_uid), + data=GenericDataRowData(uid=data_row_uid), annotations=bbox_video_annotation_objects, ) ] From 90c1a1950dfd45bceff32b1b39c12c91f07e6a39 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:16:05 -0500 Subject: [PATCH 29/44] Finish PR --- libs/labelbox/src/labelbox/data/annotation_types/label.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index a18460bc1..8ae05f898 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -3,9 +3,7 @@ import warnings import labelbox -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -19,7 +17,7 @@ from .video import VideoObjectAnnotation, VideoMaskAnnotation from .mmc import MessageEvaluationTaskAnnotation from ..ontology import get_feature_schema_lookup -from pydantic import BaseModel, field_validator, model_serializer +from pydantic import BaseModel, field_validator class Label(BaseModel): @@ -43,7 +41,7 @@ class Label(BaseModel): """ uid: Optional[Cuid] = None - data: GenericDataRowData + data: Union[GenericDataRowData, MaskData] annotations: List[ Union[ ClassificationAnnotation, From f91a229bf7d114c407d5a80e3fb24b14714a4d44 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:44:14 -0500 Subject: [PATCH 30/44] Added back in tile data since some of its parameters are required --- .../data/annotation_types/data/tiled_image.py | 294 ++++++++++++++++++ .../data/annotation_types/test_tiled_image.py | 23 ++ 2 files changed, 317 insertions(+) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py index cdb7f4127..adb8db549 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/tiled_image.py @@ -88,6 +88,300 @@ def validate_bounds_lat_lng(self): return self +class TileLayer(BaseModel): + """Url that contains the tile layer. Must be in the format: + + https://c.tile.openstreetmap.org/{z}/{x}/{y}.png + + >>> layer = TileLayer( + url="https://c.tile.openstreetmap.org/{z}/{x}/{y}.png", + name="slippy map tile" + ) + """ + + url: str + name: Optional[str] = "default" + + def asdict(self) -> Dict[str, str]: + return {"tileLayerUrl": self.url, "name": self.name} + + @field_validator("url") + def validate_url(cls, url): + xyz_format = "/{z}/{x}/{y}" + if xyz_format not in url: + raise ValueError(f"{url} needs to contain {xyz_format}") + return url + + +class TiledImageData(BaseData): + """Represents tiled imagery + + If specified version is 2, converts bounds from [lng,lat] to [lat,lng] + + Requires the following args: + tile_layer: TileLayer + tile_bounds: TiledBounds + zoom_levels: List[int] + Optional args: + max_native_zoom: int = None + tile_size: Optional[int] + version: int = 2 + alternative_layers: List[TileLayer] + + >>> tiled_image_data = TiledImageData(tile_layer=TileLayer, + tile_bounds=TiledBounds, + zoom_levels=[1, 12]) + """ + + tile_layer: TileLayer + tile_bounds: TiledBounds + alternative_layers: List[TileLayer] = [] + zoom_levels: Tuple[int, int] + max_native_zoom: Optional[int] = None + tile_size: Optional[int] = DEFAULT_TMS_TILE_SIZE + version: Optional[int] = 2 + multithread: bool = True + + def __post_init__(self) -> None: + if self.max_native_zoom is None: + self.max_native_zoom = self.zoom_levels[0] + + def asdict(self) -> Dict[str, str]: + return { + "tileLayerUrl": self.tile_layer.url, + "bounds": [ + [self.tile_bounds.bounds[0].x, self.tile_bounds.bounds[0].y], + [self.tile_bounds.bounds[1].x, self.tile_bounds.bounds[1].y], + ], + "minZoom": self.zoom_levels[0], + "maxZoom": self.zoom_levels[1], + "maxNativeZoom": self.max_native_zoom, + "epsg": self.tile_bounds.epsg.name, + "tileSize": self.tile_size, + "alternativeLayers": [ + layer.asdict() for layer in self.alternative_layers + ], + "version": self.version, + } + + def raster_data( + self, zoom: int = 0, max_tiles: int = 32, multithread=True + ) -> RasterData: + """Converts the tiled image asset into a RasterData object containing an + np.ndarray. + + Uses the minimum zoom provided to render the image. + """ + if self.tile_bounds.epsg == EPSG.SIMPLEPIXEL: + xstart, ystart, xend, yend = self._get_simple_image_params(zoom) + elif self.tile_bounds.epsg == EPSG.EPSG4326: + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, self.tile_bounds + ) + elif self.tile_bounds.epsg == EPSG.EPSG3857: + # transform to 4326 + transformer = EPSGTransformer.create_geo_to_geo_transformer( + EPSG.EPSG3857, EPSG.EPSG4326 + ) + transforming_bounds = [ + transformer(self.tile_bounds.bounds[0]), + transformer(self.tile_bounds.bounds[1]), + ] + xstart, ystart, xend, yend = self._get_3857_image_params( + zoom, transforming_bounds + ) + else: + raise ValueError(f"Unsupported epsg found: {self.tile_bounds.epsg}") + + self._validate_num_tiles(xstart, ystart, xend, yend, max_tiles) + + rounded_tiles, pixel_offsets = list( + zip( + *[ + self._tile_to_pixel(pt) + for pt in [xstart, ystart, xend, yend] + ] + ) + ) + + image = self._fetch_image_for_bounds(*rounded_tiles, zoom, multithread) + arr = self._crop_to_bounds(image, *pixel_offsets) + return RasterData(arr=arr) + + @property + def value(self) -> np.ndarray: + """Returns the value of a generated RasterData object.""" + return self.raster_data( + self.zoom_levels[0], multithread=self.multithread + ).value + + def _get_simple_image_params( + self, zoom + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + + Simple has different order of x / y than lat / lng because of how leaflet behaves + leaflet reports all points as pixel locations at a zoom of 0 + """ + xend, xstart, yend, ystart = ( + self.tile_bounds.bounds[1].x, + self.tile_bounds.bounds[0].x, + self.tile_bounds.bounds[1].y, + self.tile_bounds.bounds[0].y, + ) + return ( + *[ + x * (2 ** (zoom)) / self.tile_size + for x in [xstart, ystart, xend, yend] + ], + ) + + def _get_3857_image_params( + self, zoom: int, bounds: TiledBounds + ) -> Tuple[float, float, float, float]: + """Computes the x and y tile bounds for fetching an image that + captures the entire labeling region (TiledData.bounds) given a specific zoom + """ + lat_start, lat_end = bounds.bounds[1].y, bounds.bounds[0].y + lng_start, lng_end = bounds.bounds[1].x, bounds.bounds[0].x + + # Convert to zoom 0 tile coordinates + xstart, ystart = self._latlng_to_tile(lat_start, lng_start) + xend, yend = self._latlng_to_tile(lat_end, lng_end) + + # Make sure that the tiles are increasing in order + xstart, xend = min(xstart, xend), max(xstart, xend) + ystart, yend = min(ystart, yend), max(ystart, yend) + return (*[pt * 2.0**zoom for pt in [xstart, ystart, xend, yend]],) + + def _latlng_to_tile( + self, lat: float, lng: float, zoom=0 + ) -> Tuple[float, float]: + """Converts lat/lng to 3857 tile coordinates + Formula found here: + https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#lon.2Flat_to_tile_numbers_2 + """ + scale = 2**zoom + lat_rad = math.radians(lat) + x = (lng + 180.0) / 360.0 * scale + y = (1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * scale + return x, y + + def _tile_to_pixel(self, tile: float) -> Tuple[int, int]: + """Rounds a tile coordinate and reports the remainder in pixels""" + rounded_tile = int(tile) + remainder = tile - rounded_tile + pixel_offset = int(self.tile_size * remainder) + return rounded_tile, pixel_offset + + def _fetch_image_for_bounds( + self, + x_tile_start: int, + y_tile_start: int, + x_tile_end: int, + y_tile_end: int, + zoom: int, + multithread=True, + ) -> np.ndarray: + """Fetches the tiles and combines them into a single image. + + If a tile cannot be fetched, a padding of expected tile size is instead added. + """ + + if multithread: + tiles = {} + with ThreadPoolExecutor( + max_workers=TILE_DOWNLOAD_CONCURRENCY + ) as exc: + for x in range(x_tile_start, x_tile_end + 1): + for y in range(y_tile_start, y_tile_end + 1): + tiles[(x, y)] = exc.submit(self._fetch_tile, x, y, zoom) + + rows = [] + for y in range(y_tile_start, y_tile_end + 1): + row = [] + for x in range(x_tile_start, x_tile_end + 1): + try: + if multithread: + row.append(tiles[(x, y)].result()) + else: + row.append(self._fetch_tile(x, y, zoom)) + except: + row.append( + np.zeros( + shape=(self.tile_size, self.tile_size, 3), + dtype=np.uint8, + ) + ) + rows.append(np.hstack(row)) + + return np.vstack(rows) + + @retry.Retry(initial=1, maximum=16, multiplier=2) + def _fetch_tile(self, x: int, y: int, z: int) -> np.ndarray: + """ + Fetches the image and returns an np array. + """ + data = requests.get(self.tile_layer.url.format(x=x, y=y, z=z)) + data.raise_for_status() + decoded = np.array(Image.open(BytesIO(data.content)))[..., :3] + if decoded.shape[:2] != (self.tile_size, self.tile_size): + logger.warning(f"Unexpected tile size {decoded.shape}.") + return decoded + + def _crop_to_bounds( + self, + image: np.ndarray, + x_px_start: int, + y_px_start: int, + x_px_end: int, + y_px_end: int, + ) -> np.ndarray: + """This function slices off the excess pixels that are outside of the bounds. + This occurs because only full tiles can be downloaded at a time. + """ + + def invert_point(pt): + # Must have at least 1 pixel for stability. + pt = max(pt, 1) + # All pixel points are relative to a single tile + # So subtracting the tile size inverts the axis + pt = pt - self.tile_size + return pt if pt != 0 else None + + x_px_end, y_px_end = invert_point(x_px_end), invert_point(y_px_end) + return image[y_px_start:y_px_end, x_px_start:x_px_end, :] + + def _validate_num_tiles( + self, + xstart: float, + ystart: float, + xend: float, + yend: float, + max_tiles: int, + ): + """Calculates the number of expected tiles we would fetch. + + If this is greater than the number of max tiles, raise an error. + """ + total_n_tiles = (yend - ystart + 1) * (xend - xstart + 1) + if total_n_tiles > max_tiles: + raise ValueError( + f"Requested zoom results in {total_n_tiles} tiles." + f"Max allowed tiles are {max_tiles}" + f"Increase max tiles or reduce zoom level." + ) + + @field_validator("zoom_levels") + def validate_zoom_levels(cls, zoom_levels): + if zoom_levels[0] > zoom_levels[1]: + raise ValueError( + f"Order of zoom levels should be min, max. Received {zoom_levels}" + ) + return zoom_levels + + class EPSGTransformer(BaseModel): """Transformer class between different EPSG's. Useful when wanting to project in different formats. diff --git a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py index 9b96c9445..46f2383d6 100644 --- a/libs/labelbox/tests/data/annotation_types/test_tiled_image.py +++ b/libs/labelbox/tests/data/annotation_types/test_tiled_image.py @@ -6,6 +6,8 @@ from labelbox.data.annotation_types.data.tiled_image import ( EPSG, TiledBounds, + TileLayer, + TiledImageData, EPSGTransformer, ) from pydantic import ValidationError @@ -35,6 +37,27 @@ def test_tiled_bounds_same(epsg): ) +def test_create_tiled_image_data(): + bounds_points = [Point(x=0, y=0), Point(x=5, y=5)] + url = ( + "https://labelbox.s3-us-west-2.amazonaws.com/pathology/{z}/{x}/{y}.png" + ) + zoom_levels = (1, 10) + + tile_layer = TileLayer(url=url, name="slippy map tile") + tile_bounds = TiledBounds(epsg=EPSG.EPSG4326, bounds=bounds_points) + tiled_image_data = TiledImageData( + tile_layer=tile_layer, + tile_bounds=tile_bounds, + zoom_levels=zoom_levels, + version=2, + ) + assert isinstance(tiled_image_data, TiledImageData) + assert tiled_image_data.tile_bounds.bounds == bounds_points + assert tiled_image_data.tile_layer.url == url + assert tiled_image_data.zoom_levels == zoom_levels + + def test_epsg_point_projections(): zoom = 4 From fdabc94b4345f13fb454db1420229f99adc9cb40 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:51:42 -0500 Subject: [PATCH 31/44] Added tile back to __init__.py --- libs/labelbox/src/labelbox/data/annotation_types/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index 84d6d65a5..1a78127e1 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -48,6 +48,8 @@ from .data.tiled_image import EPSG from .data.tiled_image import EPSGTransformer from .data.tiled_image import TiledBounds +from .data.tiled_image import TiledImageData +from .data.tiled_image import TileLayer from .llm_prompt_response.prompt import PromptText from .llm_prompt_response.prompt import PromptClassificationAnnotation From 5bb3c97b59cd87f776a9b8657584d9e4f9944350 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 12:50:08 -0500 Subject: [PATCH 32/44] Fixed import --- libs/labelbox/src/labelbox/data/annotation_types/data/raster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index 3a4e8bb6e..fc9acd50f 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -12,7 +12,6 @@ from typing_extensions import Literal from pydantic import BaseModel, model_validator, ConfigDict -from labelbox.exceptions import InternalServerError from ..types import TypedArray From 4888346a129f3b172bca5dde6f2ff548e7002760 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:50:16 -0500 Subject: [PATCH 33/44] Removed some data tests --- .../data/annotation_types/data/test_raster.py | 16 +--- .../data/annotation_types/data/test_text.py | 55 -------------- .../data/annotation_types/data/test_video.py | 73 ------------------- 3 files changed, 4 insertions(+), 140 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_types/data/test_text.py delete mode 100644 libs/labelbox/tests/data/annotation_types/data/test_video.py diff --git a/libs/labelbox/tests/data/annotation_types/data/test_raster.py b/libs/labelbox/tests/data/annotation_types/data/test_raster.py index 304ed3e95..209419aed 100644 --- a/libs/labelbox/tests/data/annotation_types/data/test_raster.py +++ b/libs/labelbox/tests/data/annotation_types/data/test_raster.py @@ -11,7 +11,7 @@ def test_validate_schema(): with pytest.raises(ValidationError): - GenericDataRowData() + MaskData() def test_im_bytes(): @@ -24,15 +24,9 @@ def test_im_bytes(): def test_im_url(): - raster_data = GenericDataRowData(url="https://picsum.photos/id/829/200/300") - data_ = raster_data.value - assert data_.shape == (300, 200, 3) - - -def test_im_path(): - img_path = "/tmp/img.jpg" - urllib.request.urlretrieve("https://picsum.photos/id/829/200/300", img_path) - raster_data = GenericDataRowData(file_path=img_path) + raster_data = MaskData( + uid="test", url="https://picsum.photos/id/829/200/300" + ) data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -43,12 +37,10 @@ def test_ref(): metadata = [] media_attributes = {} data = GenericDataRowData( - external_id=external_id, uid=uid, metadata=metadata, media_attributes=media_attributes, ) - assert data.external_id == external_id assert data.uid == uid assert data.media_attributes == media_attributes assert data.metadata == metadata diff --git a/libs/labelbox/tests/data/annotation_types/data/test_text.py b/libs/labelbox/tests/data/annotation_types/data/test_text.py deleted file mode 100644 index 865f93e65..000000000 --- a/libs/labelbox/tests/data/annotation_types/data/test_text.py +++ /dev/null @@ -1,55 +0,0 @@ -import os - -import pytest - -from labelbox.data.annotation_types import TextData -from pydantic import ValidationError - - -def test_validate_schema(): - with pytest.raises(ValidationError): - data = TextData() - - -def test_text(): - text = "hello world" - metadata = [] - media_attributes = {} - text_data = TextData( - text=text, metadata=metadata, media_attributes=media_attributes - ) - assert text_data.text == text - - -def test_url(): - url = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/sample3.txt" - text_data = TextData(url=url) - text = text_data.value - assert len(text) == 3541 - - -def test_file(tmpdir): - content = "foo bar baz" - file = "hello.txt" - dir = tmpdir.mkdir("data") - dir.join(file).write(content) - text_data = TextData(file_path=os.path.join(dir.strpath, file)) - assert len(text_data.value) == len(content) - - -def test_ref(): - external_id = "external_id" - uid = "uid" - metadata = [] - media_attributes = {} - data = TextData( - text="hello world", - external_id=external_id, - uid=uid, - metadata=metadata, - media_attributes=media_attributes, - ) - assert data.external_id == external_id - assert data.uid == uid - assert data.media_attributes == media_attributes - assert data.metadata == metadata diff --git a/libs/labelbox/tests/data/annotation_types/data/test_video.py b/libs/labelbox/tests/data/annotation_types/data/test_video.py deleted file mode 100644 index 5fd77c2c8..000000000 --- a/libs/labelbox/tests/data/annotation_types/data/test_video.py +++ /dev/null @@ -1,73 +0,0 @@ -import numpy as np -import pytest - -from labelbox.data.annotation_types import VideoData -from pydantic import ValidationError - - -def test_validate_schema(): - with pytest.raises(ValidationError): - data = VideoData() - - -def test_frames(): - data = { - x: (np.random.random((32, 32, 3)) * 255).astype(np.uint8) - for x in range(5) - } - video_data = VideoData(frames=data) - for idx, frame in video_data.frame_generator(): - assert idx in data - assert np.all(frame == data[idx]) - - -def test_file_path(): - path = "tests/integration/media/cat.mp4" - raster_data = VideoData(file_path=path) - - with pytest.raises(ValueError): - raster_data[0] - - raster_data.load_frames() - raster_data[0] - - frame_indices = list(raster_data.frames.keys()) - # 29 frames - assert set(frame_indices) == set(list(range(28))) - - -def test_file_url(): - url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerMeltdowns.mp4" - raster_data = VideoData(url=url) - - with pytest.raises(ValueError): - raster_data[0] - - raster_data.load_frames() - raster_data[0] - - frame_indices = list(raster_data.frames.keys()) - # 362 frames - assert set(frame_indices) == set(list(range(361))) - - -def test_ref(): - external_id = "external_id" - uid = "uid" - data = { - x: (np.random.random((32, 32, 3)) * 255).astype(np.uint8) - for x in range(5) - } - metadata = [] - media_attributes = {} - data = VideoData( - frames=data, - external_id=external_id, - uid=uid, - metadata=metadata, - media_attributes=media_attributes, - ) - assert data.external_id == external_id - assert data.uid == uid - assert data.media_attributes == media_attributes - assert data.metadata == metadata From c32bd715c292537d607be6d3eb3eb555cba71cfe Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:51:28 -0500 Subject: [PATCH 34/44] Removed videoData --- .../data/serialization/ndjson/test_export_video_objects.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 1ab678cde..a0cd13e81 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -1,13 +1,13 @@ from labelbox.data.annotation_types import Label, VideoObjectAnnotation from labelbox.data.serialization.ndjson.converter import NDJsonConverter from labelbox.data.annotation_types.geometry import Rectangle, Point -from labelbox.data.annotation_types import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData def video_bbox_label(): return Label( uid="cl1z52xwh00050fhcmfgczqvn", - data=VideoData( + data=GenericDataRowData( uid="cklr9mr4m5iao0rb6cvxu4qbn", url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), From 1c50842070900615b2cebbf4124d0859d476ffce Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 23 Sep 2024 11:53:40 -0700 Subject: [PATCH 35/44] Fix exception type for labeling service test (#1835) --- libs/labelbox/src/labelbox/client.py | 41 +++++++++---------- .../integration/test_labeling_service.py | 9 ++-- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index b0b5a1407..055bee676 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -9,10 +9,9 @@ import urllib.parse from collections import defaultdict from datetime import datetime, timezone -from typing import Any, List, Dict, Union, Optional, overload, Callable from types import MappingProxyType +from typing import Any, Dict, List, Optional, Union, overload -from labelbox.schema.search_filters import SearchFilter import requests import requests.exceptions from google.api_core import retry @@ -26,20 +25,18 @@ from labelbox.orm.model import Entity, Field from labelbox.pagination import PaginatedCollection from labelbox.schema import role -from labelbox.schema.conflict_resolution_strategy import ( - ConflictResolutionStrategy, -) -from labelbox.schema.data_row import DataRow from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRow from labelbox.schema.data_row_metadata import DataRowMetadataOntology from labelbox.schema.dataset import Dataset from labelbox.schema.embedding import Embedding from labelbox.schema.enums import CollectionJobStatus from labelbox.schema.foundry.foundry_client import FoundryClient from labelbox.schema.iam_integration import IAMIntegration -from labelbox.schema.identifiables import DataRowIds -from labelbox.schema.identifiables import GlobalKeys +from labelbox.schema.identifiables import DataRowIds, GlobalKeys +from labelbox.schema.label_score import LabelScore from labelbox.schema.labeling_frontend import LabelingFrontend +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard from labelbox.schema.media_type import ( MediaType, get_media_type_validation_error, @@ -47,40 +44,40 @@ from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig from labelbox.schema.model_run import ModelRun -from labelbox.schema.ontology import Ontology, DeleteFeatureFromOntologyResult from labelbox.schema.ontology import ( - Tool, Classification, + DeleteFeatureFromOntologyResult, FeatureSchema, + Ontology, PromptResponseClassification, + Tool, +) +from labelbox.schema.ontology_kind import ( + EditorTaskType, + EditorTaskTypeMapper, + OntologyKind, ) from labelbox.schema.organization import Organization from labelbox.schema.project import Project from labelbox.schema.quality_mode import ( - QualityMode, BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS, BENCHMARK_AUTO_AUDIT_PERCENTAGE, CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS, CONSENSUS_AUTO_AUDIT_PERCENTAGE, + QualityMode, ) from labelbox.schema.queue_mode import QueueMode from labelbox.schema.role import Role +from labelbox.schema.search_filters import SearchFilter from labelbox.schema.send_to_annotate_params import ( SendToAnnotateFromCatalogParams, + build_annotations_input, build_destination_task_queue_input, build_predictions_input, - build_annotations_input, ) from labelbox.schema.slice import CatalogSlice, ModelSlice -from labelbox.schema.task import Task, DataUpsertTask +from labelbox.schema.task import DataUpsertTask, Task from labelbox.schema.user import User -from labelbox.schema.label_score import LabelScore -from labelbox.schema.ontology_kind import ( - OntologyKind, - EditorTaskTypeMapper, - EditorTaskType, -) -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard logger = logging.getLogger(__name__) @@ -540,7 +537,7 @@ def upload_data( error_msg = next(iter(errors), {}).get( "message", "Unknown error" ) - except Exception as e: + except Exception: error_msg = "Unknown error" raise labelbox.exceptions.LabelboxError( "Failed to upload, message: %s" % error_msg @@ -842,7 +839,7 @@ def create_dataset( if not validation_result["validateDataset"]["valid"]: raise labelbox.exceptions.LabelboxError( - f"IAMIntegration was not successfully added to the dataset." + "IAMIntegration was not successfully added to the dataset." ) except Exception as e: dataset.delete() diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index 09b5c24a1..04a1cb507 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -1,6 +1,9 @@ import pytest -from labelbox.exceptions import LabelboxError, ResourceNotFoundError +from labelbox.exceptions import ( + MalformedQueryException, + ResourceNotFoundError, +) from labelbox.schema.labeling_service import LabelingServiceStatus @@ -51,7 +54,7 @@ def test_request_labeling_service_moe_project( labeling_service = project.get_labeling_service() with pytest.raises( - LabelboxError, + MalformedQueryException, match='[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]', ): labeling_service.request() @@ -73,5 +76,5 @@ def test_request_labeling_service_incomplete_requirements(ontology, project): ): # No labeling service by default labeling_service.request() project.connect_ontology(ontology) - with pytest.raises(LabelboxError): + with pytest.raises(MalformedQueryException): labeling_service.request() From 3c7e72c28fa03d677d2bb2202de7489dd7d33930 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:02:19 -0500 Subject: [PATCH 36/44] Removed some warnings --- .../src/labelbox/data/annotation_types/types.py | 11 +++++++---- .../labelbox/src/labelbox/schema/data_row_metadata.py | 9 ++++++++- libs/labelbox/src/labelbox/schema/search_filters.py | 5 ++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/types.py b/libs/labelbox/src/labelbox/data/annotation_types/types.py index 9bb86a4b9..adcadf306 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/types.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/types.py @@ -5,7 +5,8 @@ from packaging import version import numpy as np -from pydantic import StringConstraints, Field +from pydantic import StringConstraints, Field, ConfigDict +from pydantic_core import core_schema DType = TypeVar("DType") DShape = TypeVar("DShape") @@ -13,11 +14,13 @@ class _TypedArray(np.ndarray, Generic[DType, DShape]): @classmethod - def __get_validators__(cls): - yield cls.validate + def __get_pydantic_core_schema__( + cls, _source_type: type, _model: type + ) -> core_schema.CoreSchema: + return core_schema.no_info_plain_validator_function(cls.validate) @classmethod - def validate(cls, val, field: Field): + def validate(cls, val): if not isinstance(val, np.ndarray): raise TypeError(f"Expected numpy array. Found {type(val)}") return val diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index d6c50b975..f67b3d269 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -27,6 +27,7 @@ conlist, ConfigDict, model_serializer, + AfterValidator, ) from labelbox.schema.ontology import SchemaId @@ -36,6 +37,12 @@ format_iso_from_string, ) +Name = Annotated[ + str, + AfterValidator(lambda x: str.strip(str(x))), + Field(min_length=1, max_length=100), +] + class DataRowMetadataKind(Enum): number = "CustomMetadataNumber" @@ -49,7 +56,7 @@ class DataRowMetadataKind(Enum): # Metadata schema class DataRowMetadataSchema(BaseModel): uid: SchemaId - name: str = Field(strip_whitespace=True, min_length=1, max_length=100) + name: Name reserved: bool kind: DataRowMetadataKind options: Optional[List["DataRowMetadataSchema"]] = None diff --git a/libs/labelbox/src/labelbox/schema/search_filters.py b/libs/labelbox/src/labelbox/schema/search_filters.py index 13b158678..e61e29ad8 100644 --- a/libs/labelbox/src/labelbox/schema/search_filters.py +++ b/libs/labelbox/src/labelbox/schema/search_filters.py @@ -5,7 +5,7 @@ from typing_extensions import Annotated -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, ConfigDict from labelbox.schema.labeling_service_status import LabelingServiceStatus from labelbox.utils import format_iso_datetime @@ -15,8 +15,7 @@ class BaseSearchFilter(BaseModel): Shared code for all search filters """ - class Config: - use_enum_values = True + model_config = ConfigDict(use_enum_values=True) class OperationTypeEnum(Enum): From 3dd9911346ed45c32701e8ec61c9d6d0b7cc32f9 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:08:46 -0500 Subject: [PATCH 37/44] Fix merge conflicts --- libs/labelbox/src/labelbox/client.py | 2450 -------------------------- 1 file changed, 2450 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/client.py diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py deleted file mode 100644 index 77fa5d6a4..000000000 --- a/libs/labelbox/src/labelbox/client.py +++ /dev/null @@ -1,2450 +0,0 @@ -# type: ignore -import json -import logging -import mimetypes -import os -import random -import time -import urllib.parse -from collections import defaultdict -from datetime import datetime, timezone -from types import MappingProxyType -from typing import Any, Dict, List, Optional, Union, overload - -import requests -import requests.exceptions -from google.api_core import retry -from lbox.request_client import RequestClient - -from labelbox import __version__ as SDK_VERSION -from labelbox import utils -from labelbox.adv_client import AdvClient -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Entity, Field -from labelbox.pagination import PaginatedCollection -from labelbox.schema import role -from labelbox.schema.catalog import Catalog -from labelbox.schema.data_row import DataRow -from labelbox.schema.data_row_metadata import DataRowMetadataOntology -from labelbox.schema.dataset import Dataset -from labelbox.schema.embedding import Embedding -from labelbox.schema.enums import CollectionJobStatus -from labelbox.schema.foundry.foundry_client import FoundryClient -from labelbox.schema.iam_integration import IAMIntegration -from labelbox.schema.identifiables import DataRowIds, GlobalKeys -from labelbox.schema.label_score import LabelScore -from labelbox.schema.labeling_frontend import LabelingFrontend -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -from labelbox.schema.media_type import ( - MediaType, - get_media_type_validation_error, -) -from labelbox.schema.model import Model -from labelbox.schema.model_config import ModelConfig -from labelbox.schema.model_run import ModelRun -from labelbox.schema.ontology import ( - Classification, - DeleteFeatureFromOntologyResult, - FeatureSchema, - Ontology, - PromptResponseClassification, - Tool, -) -from labelbox.schema.ontology_kind import ( - EditorTaskType, - EditorTaskTypeMapper, - OntologyKind, -) -from labelbox.schema.organization import Organization -from labelbox.schema.project import Project -from labelbox.schema.quality_mode import ( - BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS, - BENCHMARK_AUTO_AUDIT_PERCENTAGE, - CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS, - CONSENSUS_AUTO_AUDIT_PERCENTAGE, - QualityMode, -) -from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.role import Role -from labelbox.schema.search_filters import SearchFilter -from labelbox.schema.send_to_annotate_params import ( - SendToAnnotateFromCatalogParams, - build_annotations_input, - build_destination_task_queue_input, - build_predictions_input, -) -from labelbox.schema.slice import CatalogSlice, ModelSlice -from labelbox.schema.task import DataUpsertTask, Task -from labelbox.schema.user import User - -logger = logging.getLogger(__name__) - - -class Client: - """A Labelbox client. - - Provides functions for querying and creating - top-level data objects (Projects, Datasets). - """ - - def __init__( - self, - api_key=None, - endpoint="https://api.labelbox.com/graphql", - enable_experimental=False, - app_url="https://app.labelbox.com", - rest_endpoint="https://api.labelbox.com/api/v1", - ): - """Creates and initializes a Labelbox Client. - - Logging is defaulted to level WARNING. To receive more verbose - output to console, update `logging.level` to the appropriate level. - - >>> logging.basicConfig(level = logging.INFO) - >>> client = Client("") - - Args: - api_key (str): API key. If None, the key is obtained from the "LABELBOX_API_KEY" environment variable. - endpoint (str): URL of the Labelbox server to connect to. - enable_experimental (bool): Indicates whether or not to use experimental features - app_url (str) : host url for all links to the web app - Raises: - lbox.exceptions.AuthenticationError: If no `api_key` - is provided as an argument or via the environment - variable. - """ - self._data_row_metadata_ontology = None - self._request_client = RequestClient( - sdk_version=SDK_VERSION, - api_key=api_key, - endpoint=endpoint, - enable_experimental=enable_experimental, - app_url=app_url, - rest_endpoint=rest_endpoint, - ) - self._adv_client = AdvClient.factory(rest_endpoint, api_key) - - @property - def headers(self) -> MappingProxyType: - return self._request_client.headers - - @property - def connection(self) -> requests.Session: - return self._request_client._connection - - @property - def endpoint(self) -> str: - return self._request_client.endpoint - - @property - def rest_endpoint(self) -> str: - return self._request_client.rest_endpoint - - @property - def enable_experimental(self) -> bool: - return self._request_client.enable_experimental - - @property - def app_url(self) -> str: - return self._request_client.app_url - - def execute( - self, - query=None, - params=None, - data=None, - files=None, - timeout=60.0, - experimental=False, - error_log_key="message", - raise_return_resource_not_found=False, - error_handlers: Optional[ - Dict[str, Callable[[requests.models.Response], None]] - ] = None, - ) -> Dict[str, Any]: - """Executes a GraphQL query. - - Args: - query (str): The query to execute. - variables (dict): Variables to pass to the query. - raise_return_resource_not_found (bool): If True, raise a - ResourceNotFoundError if the query returns None. - error_handlers (dict): A dictionary mapping graphql error code to handler functions. - Allows a caller to handle specific errors reporting in a custom way or produce more user-friendly readable messages - - Returns: - dict: The response from the server. - """ - return self._request_client.execute( - query, - params, - data=data, - files=files, - timeout=timeout, - experimental=experimental, - error_log_key=error_log_key, - raise_return_resource_not_found=raise_return_resource_not_found, - error_handlers=error_handlers, - ) - - def upload_file(self, path: str) -> str: - """Uploads given path to local file. - - Also includes best guess at the content type of the file. - - Args: - path (str): path to local file to be uploaded. - Returns: - str, the URL of uploaded data. - Raises: - lbox.exceptions.LabelboxError: If upload failed. - """ - content_type, _ = mimetypes.guess_type(path) - filename = os.path.basename(path) - with open(path, "rb") as f: - return self.upload_data( - content=f.read(), filename=filename, content_type=content_type - ) - - @retry.Retry( - predicate=retry.if_exception_type(lbox.exceptions.InternalServerError) - ) - def upload_data( - self, - content: bytes, - filename: str = None, - content_type: str = None, - sign: bool = False, - ) -> str: - """Uploads the given data (bytes) to Labelbox. - - Args: - content: bytestring to upload - filename: name of the upload - content_type: content type of data uploaded - sign: whether or not to sign the url - - Returns: - str, the URL of uploaded data. - - Raises: - lbox.exceptions.LabelboxError: If upload failed. - """ - - request_data = { - "operations": json.dumps( - { - "variables": { - "file": None, - "contentLength": len(content), - "sign": sign, - }, - "query": """mutation UploadFile($file: Upload!, $contentLength: Int!, - $sign: Boolean) { - uploadFile(file: $file, contentLength: $contentLength, - sign: $sign) {url filename} } """, - } - ), - "map": (None, json.dumps({"1": ["variables.file"]})), - } - - files = { - "1": (filename, content, content_type) - if (filename and content_type) - else content - } - headers = self.connection.headers.copy() - headers.pop("Content-Type", None) - request = requests.Request( - "POST", - self.endpoint, - headers=headers, - data=request_data, - files=files, - ) - - prepped: requests.PreparedRequest = request.prepare() - - response = self.connection.send(prepped) - - if response.status_code == 502: - error_502 = "502 Bad Gateway" - raise lbox.exceptions.InternalServerError(error_502) - elif response.status_code == 503: - raise lbox.exceptions.InternalServerError(response.text) - elif response.status_code == 520: - raise lbox.exceptions.InternalServerError(response.text) - - try: - file_data = response.json().get("data", None) - except ValueError as e: # response is not valid JSON - raise lbox.exceptions.LabelboxError( - "Failed to upload, unknown cause", e - ) - - if not file_data or not file_data.get("uploadFile", None): - try: - errors = response.json().get("errors", []) - error_msg = next(iter(errors), {}).get( - "message", "Unknown error" - ) - except Exception: - error_msg = "Unknown error" - raise lbox.exceptions.LabelboxError( - "Failed to upload, message: %s" % error_msg - ) - - return file_data["uploadFile"]["url"] - - def _get_single(self, db_object_type, uid): - """Fetches a single object of the given type, for the given ID. - - Args: - db_object_type (type): DbObject subclass. - uid (str): Unique ID of the row. - Returns: - Object of `db_object_type`. - Raises: - lbox.exceptions.ResourceNotFoundError: If there is no object - of the given type for the given ID. - """ - query_str, params = query.get_single(db_object_type, uid) - - res = self.execute(query_str, params) - res = res and res.get(utils.camel_case(db_object_type.type_name())) - if res is None: - raise lbox.exceptions.ResourceNotFoundError(db_object_type, params) - else: - return db_object_type(self, res) - - def get_project(self, project_id) -> Project: - """Gets a single Project with the given ID. - - >>> project = client.get_project("") - - Args: - project_id (str): Unique ID of the Project. - Returns: - The sought Project. - Raises: - lbox.exceptions.ResourceNotFoundError: If there is no - Project with the given ID. - """ - return self._get_single(Entity.Project, project_id) - - def get_dataset(self, dataset_id) -> Dataset: - """Gets a single Dataset with the given ID. - - >>> dataset = client.get_dataset("") - - Args: - dataset_id (str): Unique ID of the Dataset. - Returns: - The sought Dataset. - Raises: - lbox.exceptions.ResourceNotFoundError: If there is no - Dataset with the given ID. - """ - return self._get_single(Entity.Dataset, dataset_id) - - def get_user(self) -> User: - """Gets the current User database object. - - >>> user = client.get_user() - """ - return self._get_single(Entity.User, None) - - def get_organization(self) -> Organization: - """Gets the Organization DB object of the current user. - - >>> organization = client.get_organization() - """ - return self._get_single(Entity.Organization, None) - - def _get_all(self, db_object_type, where, filter_deleted=True): - """Fetches all the objects of the given type the user has access to. - - Args: - db_object_type (type): DbObject subclass. - where (Comparison, LogicalOperation or None): The `where` clause - for filtering. - Returns: - An iterable of `db_object_type` instances. - """ - if filter_deleted: - not_deleted = db_object_type.deleted == False # noqa: E712 Needed for bit operator to combine comparisons - where = not_deleted if where is None else where & not_deleted - query_str, params = query.get_all(db_object_type, where) - - return PaginatedCollection( - self, - query_str, - params, - [utils.camel_case(db_object_type.type_name()) + "s"], - db_object_type, - ) - - def get_projects(self, where=None) -> PaginatedCollection: - """Fetches all the projects the user has access to. - - >>> projects = client.get_projects(where=(Project.name == "") & (Project.description == "")) - - Args: - where (Comparison, LogicalOperation or None): The `where` clause - for filtering. - Returns: - PaginatedCollection of all projects the user has access to or projects matching the criteria specified. - """ - return self._get_all(Entity.Project, where) - - def get_users(self, where=None) -> PaginatedCollection: - """Fetches all the users. - - >>> users = client.get_users(where=User.email == "") - - Args: - where (Comparison, LogicalOperation or None): The `where` clause - for filtering. - Returns: - An iterable of Users (typically a PaginatedCollection). - """ - return self._get_all(Entity.User, where, filter_deleted=False) - - def get_datasets(self, where=None) -> PaginatedCollection: - """Fetches one or more datasets. - - >>> datasets = client.get_datasets(where=(Dataset.name == "") & (Dataset.description == "")) - - Args: - where (Comparison, LogicalOperation or None): The `where` clause - for filtering. - Returns: - PaginatedCollection of all datasets the user has access to or datasets matching the criteria specified. - """ - return self._get_all(Entity.Dataset, where) - - def get_labeling_frontends(self, where=None) -> List[LabelingFrontend]: - """Fetches all the labeling frontends. - - >>> frontend = client.get_labeling_frontends(where=LabelingFrontend.name == "Editor") - - Args: - where (Comparison, LogicalOperation or None): The `where` clause - for filtering. - Returns: - An iterable of LabelingFrontends (typically a PaginatedCollection). - """ - return self._get_all(Entity.LabelingFrontend, where) - - def _create(self, db_object_type, data, extra_params={}): - """Creates an object on the server. Attribute values are - passed as keyword arguments: - - Args: - db_object_type (type): A DbObjectType subtype. - data (dict): Keys are attributes or their names (in Python, - snake-case convention) and values are desired attribute values. - extra_params (dict): Additional parameters to pass to GraphQL. - These have to be Field(...): value pairs. - Returns: - A new object of the given DB object type. - Raises: - InvalidAttributeError: If the DB object type does not contain - any of the attribute names given in `data`. - """ - # Convert string attribute names to Field or Relationship objects. - # Also convert Labelbox object values to their UIDs. - data = { - db_object_type.attribute(attr) - if isinstance(attr, str) - else attr: value.uid if isinstance(value, DbObject) else value - for attr, value in data.items() - } - - data = {**data, **extra_params} - query_string, params = query.create(db_object_type, data) - res = self.execute( - query_string, params, raise_return_resource_not_found=True - ) - - if not res: - raise lbox.exceptions.LabelboxError( - "Failed to create %s" % db_object_type.type_name() - ) - res = res["create%s" % db_object_type.type_name()] - - return db_object_type(self, res) - - def create_model_config( - self, name: str, model_id: str, inference_params: dict - ) -> ModelConfig: - """Creates a new model config with the given params. - Model configs are scoped to organizations, and can be reused between projects. - - Args: - name (str): Name of the model config - model_id (str): ID of model to configure - inference_params (dict): JSON of model configuration parameters. - - Returns: - str, id of the created model config - """ - if not name: - raise ValueError("Model config name must not be an empty string.") - - query = """mutation CreateModelConfigPyApi($modelId: ID!, $inferenceParams: Json!, $name: String!) { - createModelConfig(input: {modelId: $modelId, inferenceParams: $inferenceParams, name: $name}) { - modelId - inferenceParams - id - name - } - }""" - params = { - "modelId": model_id, - "inferenceParams": inference_params, - "name": name, - } - result = self.execute(query, params) - return ModelConfig(self, result["createModelConfig"]) - - def delete_model_config(self, id: str) -> bool: - """Deletes an existing model config with the given id - - Args: - id (str): ID of existing model config - - Returns: - bool, indicates if the operation was a success. - """ - - query = """mutation DeleteModelConfigPyApi($id: ID!) { - deleteModelConfig(input: {id: $id}) { - success - } - }""" - params = {"id": id} - result = self.execute(query, params) - if not result: - raise lbox.exceptions.ResourceNotFoundError( - Entity.ModelConfig, params - ) - return result["deleteModelConfig"]["success"] - - def create_dataset( - self, iam_integration=IAMIntegration._DEFAULT, **kwargs - ) -> Dataset: - """Creates a Dataset object on the server. - - Attribute values are passed as keyword arguments. - - Args: - iam_integration (IAMIntegration) : Uses the default integration. - Optionally specify another integration or set as None to not use delegated access - **kwargs: Keyword arguments with Dataset attribute values. - Returns: - A new Dataset object. - Raises: - InvalidAttributeError: If the Dataset type does not contain - any of the attribute names given in kwargs. - Examples: - Create a dataset - >>> dataset = client.create_dataset(name="") - Create a dataset with description - >>> dataset = client.create_dataset(name="", description="") - """ - dataset = self._create(Entity.Dataset, kwargs) - if iam_integration == IAMIntegration._DEFAULT: - iam_integration = ( - self.get_organization().get_default_iam_integration() - ) - - if iam_integration is None: - return dataset - - try: - if not isinstance(iam_integration, IAMIntegration): - raise TypeError( - f"iam integration must be a reference an `IAMIntegration` object. Found {type(iam_integration)}" - ) - - if not iam_integration.valid: - raise ValueError( - "Integration is not valid. Please select another." - ) - - self.execute( - """mutation setSignerForDatasetPyApi($signerId: ID!, $datasetId: ID!) { - setSignerForDataset(data: { signerId: $signerId}, where: {id: $datasetId}){id}} - """, - {"signerId": iam_integration.uid, "datasetId": dataset.uid}, - ) - validation_result = self.execute( - """mutation validateDatasetPyApi($id: ID!){validateDataset(where: {id : $id}){ - valid checks{name, success}}} - """, - {"id": dataset.uid}, - ) - - if not validation_result["validateDataset"]["valid"]: - raise labelbox.exceptions.LabelboxError( - "IAMIntegration was not successfully added to the dataset." - ) - except Exception as e: - dataset.delete() - raise e - return dataset - - def create_project(self, **kwargs) -> Project: - """Creates a Project object on the server. - - Attribute values are passed as keyword arguments. - - >>> project = client.create_project( - name="", - description="", - media_type=MediaType.Image, - queue_mode=QueueMode.Batch - ) - - Args: - name (str): A name for the project - description (str): A short summary for the project - media_type (MediaType): The type of assets that this project will accept - queue_mode (Optional[QueueMode]): The queue mode to use - quality_mode (Optional[QualityMode]): The quality mode to use (e.g. Benchmark, Consensus). Defaults to - Benchmark - quality_modes (Optional[List[QualityMode]]): The quality modes to use (e.g. Benchmark, Consensus). Defaults to - Benchmark. - Returns: - A new Project object. - Raises: - InvalidAttributeError: If the Project type does not contain - any of the attribute names given in kwargs. - - NOTE: the following attributes are used only in chat model evaluation projects: - dataset_name_or_id, append_to_existing_dataset, data_row_count, editor_task_type - They are not used for general projects and not supported in this method - """ - # The following arguments are not supported for general projects, only for chat model evaluation projects - kwargs.pop("dataset_name_or_id", None) - kwargs.pop("append_to_existing_dataset", None) - kwargs.pop("data_row_count", None) - kwargs.pop("editor_task_type", None) - return self._create_project(**kwargs) - - @overload - def create_model_evaluation_project( - self, - dataset_name: str, - dataset_id: str = None, - data_row_count: int = 100, - **kwargs, - ) -> Project: - pass - - @overload - def create_model_evaluation_project( - self, - dataset_id: str, - dataset_name: str = None, - data_row_count: int = 100, - **kwargs, - ) -> Project: - pass - - def create_model_evaluation_project( - self, - dataset_id: Optional[str] = None, - dataset_name: Optional[str] = None, - data_row_count: int = 100, - **kwargs, - ) -> Project: - """ - Use this method exclusively to create a chat model evaluation project. - Args: - dataset_name: When creating a new dataset, pass the name - dataset_id: When using an existing dataset, pass the id - data_row_count: The number of data row assets to use for the project - **kwargs: Additional parameters to pass to the the create_project method - Returns: - Project: The created project - - Examples: - >>> client.create_model_evaluation_project(name=project_name, dataset_name="new data set") - >>> This creates a new dataset with a default number of rows (100), creates new project and assigns a batch of the newly created datarows to the project. - - >>> client.create_model_evaluation_project(name=project_name, dataset_name="new data set", data_row_count=10) - >>> This creates a new dataset with 10 data rows, creates new project and assigns a batch of the newly created datarows to the project. - - >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0") - >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created data rows to the project. - - >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10) - >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. - - - """ - if not dataset_id and not dataset_name: - raise ValueError( - "dataset_name or data_set_id must be present and not be an empty string." - ) - if data_row_count <= 0: - raise ValueError("data_row_count must be a positive integer.") - - if dataset_id: - append_to_existing_dataset = True - dataset_name_or_id = dataset_id - else: - append_to_existing_dataset = False - dataset_name_or_id = dataset_name - - kwargs["media_type"] = MediaType.Conversational - kwargs["dataset_name_or_id"] = dataset_name_or_id - kwargs["append_to_existing_dataset"] = append_to_existing_dataset - kwargs["data_row_count"] = data_row_count - kwargs["editor_task_type"] = EditorTaskType.ModelChatEvaluation.value - - return self._create_project(**kwargs) - - def create_offline_model_evaluation_project(self, **kwargs) -> Project: - """ - Creates a project for offline model evaluation. - Args: - **kwargs: Additional parameters to pass see the create_project method - Returns: - Project: The created project - """ - kwargs["media_type"] = ( - MediaType.Conversational - ) # Only Conversational is supported - kwargs["editor_task_type"] = ( - EditorTaskType.OfflineModelChatEvaluation.value - ) # Special editor task type for offline model evaluation - - # The following arguments are not supported for offline model evaluation - kwargs.pop("dataset_name_or_id", None) - kwargs.pop("append_to_existing_dataset", None) - kwargs.pop("data_row_count", None) - - return self._create_project(**kwargs) - - def create_prompt_response_generation_project( - self, - dataset_id: Optional[str] = None, - dataset_name: Optional[str] = None, - data_row_count: int = 100, - **kwargs, - ) -> Project: - """ - Use this method exclusively to create a prompt and response generation project. - - Args: - dataset_name: When creating a new dataset, pass the name - dataset_id: When using an existing dataset, pass the id - data_row_count: The number of data row assets to use for the project - **kwargs: Additional parameters to pass see the create_project method - Returns: - Project: The created project - - NOTE: Only a dataset_name or dataset_id should be included - - Examples: - >>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", media_type=MediaType.LLMPromptResponseCreation) - >>> This creates a new dataset with a default number of rows (100), creates new prompt and response creation project and assigns a batch of the newly created data rows to the project. - - >>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", data_row_count=10, media_type=MediaType.LLMPromptCreation) - >>> This creates a new dataset with 10 data rows, creates new prompt creation project and assigns a batch of the newly created datarows to the project. - - >>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", media_type=MediaType.LLMPromptCreation) - >>> This creates a new prompt creation project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created data rows to the project. - - >>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10, media_type=MediaType.LLMPromptResponseCreation) - >>> This creates a new prompt and response creation project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. - - """ - if not dataset_id and not dataset_name: - raise ValueError( - "dataset_name or dataset_id must be present and not be an empty string." - ) - - if dataset_id and dataset_name: - raise ValueError( - "Only provide a dataset_name or dataset_id, not both." - ) - - if data_row_count <= 0: - raise ValueError("data_row_count must be a positive integer.") - - if dataset_id: - append_to_existing_dataset = True - dataset_name_or_id = dataset_id - else: - append_to_existing_dataset = False - dataset_name_or_id = dataset_name - - if "media_type" in kwargs and kwargs.get("media_type") not in [ - MediaType.LLMPromptCreation, - MediaType.LLMPromptResponseCreation, - ]: - raise ValueError( - "media_type must be either LLMPromptCreation or LLMPromptResponseCreation" - ) - - kwargs["dataset_name_or_id"] = dataset_name_or_id - kwargs["append_to_existing_dataset"] = append_to_existing_dataset - kwargs["data_row_count"] = data_row_count - - kwargs.pop("editor_task_type", None) - - return self._create_project(**kwargs) - - def create_response_creation_project(self, **kwargs) -> Project: - """ - Creates a project for response creation. - Args: - **kwargs: Additional parameters to pass see the create_project method - Returns: - Project: The created project - """ - kwargs["media_type"] = MediaType.Text # Only Text is supported - kwargs["editor_task_type"] = ( - EditorTaskType.ResponseCreation.value - ) # Special editor task type for response creation projects - - # The following arguments are not supported for response creation projects - kwargs.pop("dataset_name_or_id", None) - kwargs.pop("append_to_existing_dataset", None) - kwargs.pop("data_row_count", None) - - return self._create_project(**kwargs) - - def _create_project(self, **kwargs) -> Project: - auto_audit_percentage = kwargs.get("auto_audit_percentage") - auto_audit_number_of_labels = kwargs.get("auto_audit_number_of_labels") - if ( - auto_audit_percentage is not None - or auto_audit_number_of_labels is not None - ): - raise ValueError( - "quality_modes must be set instead of auto_audit_percentage or auto_audit_number_of_labels." - ) - - name = kwargs.get("name") - if name is None or not name.strip(): - raise ValueError("project name must be a valid string.") - - queue_mode = kwargs.get("queue_mode") - if queue_mode is QueueMode.Dataset: - raise ValueError( - "Dataset queue mode is deprecated. Please prefer Batch queue mode." - ) - elif queue_mode is QueueMode.Batch: - logger.warning( - "Passing a queue mode of batch is redundant and will soon no longer be supported." - ) - - media_type = kwargs.get("media_type") - if media_type and MediaType.is_supported(media_type): - media_type_value = media_type.value - elif media_type: - raise TypeError( - f"{media_type} is not a valid media type. Use" - f" any of {MediaType.get_supported_members()}" - " from MediaType. Example: MediaType.Image." - ) - else: - logger.warning( - "Creating a project without specifying media_type" - " through this method will soon no longer be supported." - ) - media_type_value = None - - quality_modes = kwargs.get("quality_modes") - quality_mode = kwargs.get("quality_mode") - if quality_mode: - logger.warning( - "Passing quality_mode is deprecated and will soon no longer be supported. Use quality_modes instead." - ) - - if quality_modes and quality_mode: - raise ValueError( - "Cannot use both quality_modes and quality_mode at the same time. Use one or the other." - ) - - if not quality_modes and not quality_mode: - logger.info("Defaulting quality modes to Benchmark and Consensus.") - - data = kwargs - data.pop("quality_modes", None) - data.pop("quality_mode", None) - - # check if quality_modes is a set, if not, convert to set - quality_modes_set = quality_modes - if quality_modes and not isinstance(quality_modes, set): - quality_modes_set = set(quality_modes) - if quality_mode: - quality_modes_set = {quality_mode} - - if ( - quality_modes_set is None - or len(quality_modes_set) == 0 - or quality_modes_set - == {QualityMode.Benchmark, QualityMode.Consensus} - ): - data["auto_audit_number_of_labels"] = ( - CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS - ) - data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE - data["is_benchmark_enabled"] = True - data["is_consensus_enabled"] = True - elif quality_modes_set == {QualityMode.Benchmark}: - data["auto_audit_number_of_labels"] = ( - BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS - ) - data["auto_audit_percentage"] = BENCHMARK_AUTO_AUDIT_PERCENTAGE - data["is_benchmark_enabled"] = True - elif quality_modes_set == {QualityMode.Consensus}: - data["auto_audit_number_of_labels"] = ( - CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS - ) - data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE - data["is_consensus_enabled"] = True - else: - raise ValueError( - f"{quality_modes_set} is not a valid quality modes set. Allowed values are [Benchmark, Consensus]" - ) - - params = {**data} - if media_type_value: - params["media_type"] = media_type_value - - extra_params = { - Field.String("dataset_name_or_id"): params.pop( - "dataset_name_or_id", None - ), - Field.Boolean("append_to_existing_dataset"): params.pop( - "append_to_existing_dataset", None - ), - } - extra_params = {k: v for k, v in extra_params.items() if v is not None} - return self._create(Entity.Project, params, extra_params) - - def get_roles(self) -> List[Role]: - """ - Returns: - Roles: Provides information on available roles within an organization. - Roles are used for user management. - """ - return role.get_roles(self) - - def get_data_row(self, data_row_id): - """ - - Returns: - DataRow: returns a single data row given the data row id - """ - - return self._get_single(Entity.DataRow, data_row_id) - - def get_data_row_by_global_key(self, global_key: str) -> DataRow: - """ - Returns: DataRow: returns a single data row given the global key - """ - res = self.get_data_row_ids_for_global_keys([global_key]) - if res["status"] != "SUCCESS": - raise lbox.exceptions.ResourceNotFoundError( - Entity.DataRow, {global_key: global_key} - ) - data_row_id = res["results"][0] - - return self.get_data_row(data_row_id) - - def get_data_row_metadata_ontology(self) -> DataRowMetadataOntology: - """ - - Returns: - DataRowMetadataOntology: The ontology for Data Row Metadata for an organization - - """ - if self._data_row_metadata_ontology is None: - self._data_row_metadata_ontology = DataRowMetadataOntology(self) - return self._data_row_metadata_ontology - - def get_model(self, model_id) -> Model: - """Gets a single Model with the given ID. - - >>> model = client.get_model("") - - Args: - model_id (str): Unique ID of the Model. - Returns: - The sought Model. - Raises: - lbox.exceptions.ResourceNotFoundError: If there is no - Model with the given ID. - """ - return self._get_single(Entity.Model, model_id) - - def get_models(self, where=None) -> List[Model]: - """Fetches all the models the user has access to. - - >>> models = client.get_models(where=(Model.name == "")) - - Args: - where (Comparison, LogicalOperation or None): The `where` clause - for filtering. - Returns: - An iterable of Models (typically a PaginatedCollection). - """ - return self._get_all(Entity.Model, where, filter_deleted=False) - - def create_model(self, name, ontology_id) -> Model: - """Creates a Model object on the server. - - >>> model = client.create_model(, ) - - Args: - name (string): Name of the model - ontology_id (string): ID of the related ontology - Returns: - A new Model object. - Raises: - InvalidAttributeError: If the Model type does not contain - any of the attribute names given in kwargs. - """ - query_str = """mutation createModelPyApi($name: String!, $ontologyId: ID!){ - createModel(data: {name : $name, ontologyId : $ontologyId}){ - %s - } - }""" % query.results_query_part(Entity.Model) - - result = self.execute( - query_str, {"name": name, "ontologyId": ontology_id} - ) - return Entity.Model(self, result["createModel"]) - - def get_data_row_ids_for_external_ids( - self, external_ids: List[str] - ) -> Dict[str, List[str]]: - """ - Returns a list of data row ids for a list of external ids. - There is a max of 1500 items returned at a time. - - Args: - external_ids: List of external ids to fetch data row ids for - Returns: - A dict of external ids as keys and values as a list of data row ids that correspond to that external id. - """ - query_str = """query externalIdsToDataRowIdsPyApi($externalId_in: [String!]!){ - externalIdsToDataRowIds(externalId_in: $externalId_in) { dataRowId externalId } - } - """ - max_ids_per_request = 100 - result = defaultdict(list) - for i in range(0, len(external_ids), max_ids_per_request): - for row in self.execute( - query_str, - {"externalId_in": external_ids[i : i + max_ids_per_request]}, - )["externalIdsToDataRowIds"]: - result[row["externalId"]].append(row["dataRowId"]) - return result - - def get_ontology(self, ontology_id) -> Ontology: - """ - Fetches an Ontology by id. - - Args: - ontology_id (str): The id of the ontology to query for - Returns: - Ontology - """ - return self._get_single(Entity.Ontology, ontology_id) - - def get_ontologies(self, name_contains) -> PaginatedCollection: - """ - Fetches all ontologies with names that match the name_contains string. - - Args: - name_contains (str): the string to search ontology names by - Returns: - PaginatedCollection of Ontologies with names that match `name_contains` - """ - query_str = """query getOntologiesPyApi($search: String, $filter: OntologyFilter, $from : String, $first: PageSize){ - ontologies(where: {filter: $filter, search: $search}, after: $from, first: $first){ - nodes {%s} - nextCursor - } - } - """ % query.results_query_part(Entity.Ontology) - params = {"search": name_contains, "filter": {"status": "ALL"}} - return PaginatedCollection( - self, - query_str, - params, - ["ontologies", "nodes"], - Entity.Ontology, - ["ontologies", "nextCursor"], - ) - - def get_feature_schema(self, feature_schema_id): - """ - Fetches a feature schema. Only supports top level feature schemas. - - Args: - feature_schema_id (str): The id of the feature schema to query for - Returns: - FeatureSchema - """ - - query_str = """query rootSchemaNodePyApi($rootSchemaNodeWhere: RootSchemaNodeWhere!){ - rootSchemaNode(where: $rootSchemaNodeWhere){%s} - }""" % query.results_query_part(Entity.FeatureSchema) - - res = self.execute( - query_str, - {"rootSchemaNodeWhere": {"featureSchemaId": feature_schema_id}}, - )["rootSchemaNode"] - res["id"] = res["normalized"]["featureSchemaId"] - return Entity.FeatureSchema(self, res) - - def get_feature_schemas(self, name_contains) -> PaginatedCollection: - """ - Fetches top level feature schemas with names that match the `name_contains` string - - Args: - name_contains (str): search filter for a name of a root feature schema - If present, results in a case insensitive 'like' search for feature schemas - If None, returns all top level feature schemas - Returns: - PaginatedCollection of FeatureSchemas with names that match `name_contains` - """ - query_str = """query rootSchemaNodesPyApi($search: String, $filter: RootSchemaNodeFilter, $from : String, $first: PageSize){ - rootSchemaNodes(where: {filter: $filter, search: $search}, after: $from, first: $first){ - nodes {%s} - nextCursor - } - } - """ % query.results_query_part(Entity.FeatureSchema) - params = {"search": name_contains, "filter": {"status": "ALL"}} - - def rootSchemaPayloadToFeatureSchema(client, payload): - # Technically we are querying for a Schema Node. - # But the features are the same so we just grab the feature schema id - payload["id"] = payload["normalized"]["featureSchemaId"] - return Entity.FeatureSchema(client, payload) - - return PaginatedCollection( - self, - query_str, - params, - ["rootSchemaNodes", "nodes"], - rootSchemaPayloadToFeatureSchema, - ["rootSchemaNodes", "nextCursor"], - ) - - def create_ontology_from_feature_schemas( - self, - name, - feature_schema_ids, - media_type: MediaType = None, - ontology_kind: OntologyKind = None, - ) -> Ontology: - """ - Creates an ontology from a list of feature schema ids - - Args: - name (str): Name of the ontology - feature_schema_ids (List[str]): List of feature schema ids corresponding to - top level tools and classifications to include in the ontology - media_type (MediaType or None): Media type of a new ontology. - ontology_kind (OntologyKind or None): set to OntologyKind.ModelEvaluation if the ontology is for chat evaluation, - leave as None otherwise. - Returns: - The created Ontology - - NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text. - """ - tools, classifications = [], [] - for feature_schema_id in feature_schema_ids: - feature_schema = self.get_feature_schema(feature_schema_id) - tool = ["tool"] - if "tool" in feature_schema.normalized: - tool = feature_schema.normalized["tool"] - try: - Tool.Type(tool) - tools.append(feature_schema.normalized) - except ValueError: - raise ValueError( - f"Tool `{tool}` not in list of supported tools." - ) - elif "type" in feature_schema.normalized: - classification = feature_schema.normalized["type"] - if ( - classification - in Classification.Type._value2member_map_.keys() - ): - Classification.Type(classification) - classifications.append(feature_schema.normalized) - elif ( - classification - in PromptResponseClassification.Type._value2member_map_.keys() - ): - PromptResponseClassification.Type(classification) - classifications.append(feature_schema.normalized) - else: - raise ValueError( - f"Classification `{classification}` not in list of supported classifications." - ) - else: - raise ValueError( - "Neither `tool` or `classification` found in the normalized feature schema" - ) - normalized = {"tools": tools, "classifications": classifications} - - # validation for ontology_kind and media_type is done within self.create_ontology - return self.create_ontology( - name=name, - normalized=normalized, - media_type=media_type, - ontology_kind=ontology_kind, - ) - - def delete_unused_feature_schema(self, feature_schema_id: str) -> None: - """ - Deletes a feature schema if it is not used by any ontologies or annotations - Args: - feature_schema_id (str): The id of the feature schema to delete - Example: - >>> client.delete_unused_feature_schema("cleabc1my012ioqvu5anyaabc") - """ - - endpoint = ( - self.rest_endpoint - + "/feature-schemas/" - + urllib.parse.quote(feature_schema_id) - ) - response = self.connection.delete(endpoint) - - if response.status_code != requests.codes.no_content: - raise lbox.exceptions.LabelboxError( - "Failed to delete the feature schema, message: " - + str(response.json()["message"]) - ) - - def delete_unused_ontology(self, ontology_id: str) -> None: - """ - Deletes an ontology if it is not used by any annotations - Args: - ontology_id (str): The id of the ontology to delete - Example: - >>> client.delete_unused_ontology("cleabc1my012ioqvu5anyaabc") - """ - endpoint = ( - self.rest_endpoint - + "/ontologies/" - + urllib.parse.quote(ontology_id) - ) - response = self.connection.delete(endpoint) - - if response.status_code != requests.codes.no_content: - raise lbox.exceptions.LabelboxError( - "Failed to delete the ontology, message: " - + str(response.json()["message"]) - ) - - def update_feature_schema_title( - self, feature_schema_id: str, title: str - ) -> FeatureSchema: - """ - Updates a title of a feature schema - Args: - feature_schema_id (str): The id of the feature schema to update - title (str): The new title of the feature schema - Returns: - The updated feature schema - Example: - >>> client.update_feature_schema_title("cleabc1my012ioqvu5anyaabc", "New Title") - """ - - endpoint = ( - self.rest_endpoint - + "/feature-schemas/" - + urllib.parse.quote(feature_schema_id) - + "/definition" - ) - response = self.connection.patch(endpoint, json={"title": title}) - - if response.status_code == requests.codes.ok: - return self.get_feature_schema(feature_schema_id) - else: - raise lbox.exceptions.LabelboxError( - "Failed to update the feature schema, message: " - + str(response.json()["message"]) - ) - - def upsert_feature_schema(self, feature_schema: Dict) -> FeatureSchema: - """ - Upserts a feature schema - Args: - feature_schema: Dict representing the feature schema to upsert - Returns: - The upserted feature schema - Example: - Insert a new feature schema - >>> tool = Tool(name="tool", tool=Tool.Type.BOUNDING_BOX, color="#FF0000") - >>> client.upsert_feature_schema(tool.asdict()) - Update an existing feature schema - >>> tool = Tool(feature_schema_id="cleabc1my012ioqvu5anyaabc", name="tool", tool=Tool.Type.BOUNDING_BOX, color="#FF0000") - >>> client.upsert_feature_schema(tool.asdict()) - """ - - feature_schema_id = ( - feature_schema.get("featureSchemaId") or "new_feature_schema_id" - ) - endpoint = ( - self.rest_endpoint - + "/feature-schemas/" - + urllib.parse.quote(feature_schema_id) - ) - response = self.connection.put( - endpoint, json={"normalized": json.dumps(feature_schema)} - ) - - if response.status_code == requests.codes.ok: - return self.get_feature_schema(response.json()["schemaId"]) - else: - raise lbox.exceptions.LabelboxError( - "Failed to upsert the feature schema, message: " - + str(response.json()["message"]) - ) - - def insert_feature_schema_into_ontology( - self, feature_schema_id: str, ontology_id: str, position: int - ) -> None: - """ - Inserts a feature schema into an ontology. If the feature schema is already in the ontology, - it will be moved to the new position. - Args: - feature_schema_id (str): The feature schema id to upsert - ontology_id (str): The id of the ontology to insert the feature schema into - position (int): The position number of the feature schema in the ontology - Example: - >>> client.insert_feature_schema_into_ontology("cleabc1my012ioqvu5anyaabc", "clefdvwl7abcgefgu3lyvcde", 2) - """ - - endpoint = ( - self.rest_endpoint - + "/ontologies/" - + urllib.parse.quote(ontology_id) - + "/feature-schemas/" - + urllib.parse.quote(feature_schema_id) - ) - response = self.connection.post(endpoint, json={"position": position}) - if response.status_code != requests.codes.created: - raise lbox.exceptions.LabelboxError( - "Failed to insert the feature schema into the ontology, message: " - + str(response.json()["message"]) - ) - - def get_unused_ontologies(self, after: str = None) -> List[str]: - """ - Returns a list of unused ontology ids - Args: - after (str): The cursor to use for pagination - Returns: - A list of unused ontology ids - Example: - To get the first page of unused ontology ids (100 at a time) - >>> client.get_unused_ontologies() - To get the next page of unused ontology ids - >>> client.get_unused_ontologies("cleabc1my012ioqvu5anyaabc") - """ - - endpoint = self.rest_endpoint + "/ontologies/unused" - response = self.connection.get(endpoint, json={"after": after}) - - if response.status_code == requests.codes.ok: - return response.json() - else: - raise lbox.exceptions.LabelboxError( - "Failed to get unused ontologies, message: " - + str(response.json()["message"]) - ) - - def get_unused_feature_schemas(self, after: str = None) -> List[str]: - """ - Returns a list of unused feature schema ids - Args: - after (str): The cursor to use for pagination - Returns: - A list of unused feature schema ids - Example: - To get the first page of unused feature schema ids (100 at a time) - >>> client.get_unused_feature_schemas() - To get the next page of unused feature schema ids - >>> client.get_unused_feature_schemas("cleabc1my012ioqvu5anyaabc") - """ - - endpoint = self.rest_endpoint + "/feature-schemas/unused" - response = self.connection.get(endpoint, json={"after": after}) - - if response.status_code == requests.codes.ok: - return response.json() - else: - raise lbox.exceptions.LabelboxError( - "Failed to get unused feature schemas, message: " - + str(response.json()["message"]) - ) - - def create_ontology( - self, - name, - normalized, - media_type: MediaType = None, - ontology_kind: OntologyKind = None, - ) -> Ontology: - """ - Creates an ontology from normalized data - >>> normalized = {"tools" : [{'tool': 'polygon', 'name': 'cat', 'color': 'black'}], "classifications" : []} - >>> ontology = client.create_ontology("ontology-name", normalized) - - Or use the ontology builder. It is especially useful for complex ontologies - >>> normalized = OntologyBuilder(tools=[Tool(tool=Tool.Type.BBOX, name="cat", color = 'black')]).asdict() - >>> ontology = client.create_ontology("ontology-name", normalized) - - To reuse existing feature schemas, use `create_ontology_from_feature_schemas()` - More details can be found here: - https://github.com/Labelbox/labelbox-python/blob/develop/examples/basics/ontologies.ipynb - - Args: - name (str): Name of the ontology - normalized (dict): A normalized ontology payload. See above for details. - media_type (MediaType or None): Media type of a new ontology - ontology_kind (OntologyKind or None): set to OntologyKind.ModelEvaluation if the ontology is for chat evaluation or - OntologyKind.ResponseCreation if ontology is for response creation, leave as None otherwise. - - Returns: - The created Ontology - - NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text. - """ - - media_type_value = None - if media_type: - if MediaType.is_supported(media_type): - media_type_value = media_type.value - else: - raise get_media_type_validation_error(media_type) - - if ontology_kind and OntologyKind.is_supported(ontology_kind): - media_type = OntologyKind.evaluate_ontology_kind_with_media_type( - ontology_kind, media_type - ) - editor_task_type_value = EditorTaskTypeMapper.to_editor_task_type( - ontology_kind, media_type - ).value - elif ontology_kind: - raise OntologyKind.get_ontology_kind_validation_error(ontology_kind) - else: - editor_task_type_value = None - - query_str = """mutation upsertRootSchemaNodePyApi($data: UpsertOntologyInput!){ - upsertOntology(data: $data){ %s } - } """ % query.results_query_part(Entity.Ontology) - params = { - "data": { - "name": name, - "normalized": json.dumps(normalized), - "mediaType": media_type_value, - } - } - if editor_task_type_value: - params["data"]["editorTaskType"] = editor_task_type_value - - res = self.execute(query_str, params) - return Entity.Ontology(self, res["upsertOntology"]) - - def create_feature_schema(self, normalized): - """ - Creates a feature schema from normalized data. - >>> normalized = {'tool': 'polygon', 'name': 'cat', 'color': 'black'} - >>> feature_schema = client.create_feature_schema(normalized) - - Or use the Tool or Classification objects. It is especially useful for complex tools. - >>> normalized = Tool(tool=Tool.Type.BBOX, name="cat", color = 'black').asdict() - >>> feature_schema = client.create_feature_schema(normalized) - - Subclasses are also supported - >>> normalized = Tool( - tool=Tool.Type.SEGMENTATION, - name="cat", - classifications=[ - Classification( - class_type=Classification.Type.TEXT, - name="name" - ) - ] - ) - >>> feature_schema = client.create_feature_schema(normalized) - - More details can be found here: - https://github.com/Labelbox/labelbox-python/blob/develop/examples/basics/ontologies.ipynb - - Args: - normalized (dict): A normalized tool or classification payload. See above for details - Returns: - The created FeatureSchema. - """ - query_str = """mutation upsertRootSchemaNodePyApi($data: UpsertRootSchemaNodeInput!){ - upsertRootSchemaNode(data: $data){ %s } - } """ % query.results_query_part(Entity.FeatureSchema) - normalized = {k: v for k, v in normalized.items() if v} - params = {"data": {"normalized": json.dumps(normalized)}} - res = self.execute(query_str, params)["upsertRootSchemaNode"] - # Technically we are querying for a Schema Node. - # But the features are the same so we just grab the feature schema id - res["id"] = res["normalized"]["featureSchemaId"] - return Entity.FeatureSchema(self, res) - - def get_model_run(self, model_run_id: str) -> ModelRun: - """Gets a single ModelRun with the given ID. - - >>> model_run = client.get_model_run("") - - Args: - model_run_id (str): Unique ID of the ModelRun. - Returns: - A ModelRun object. - """ - return self._get_single(Entity.ModelRun, model_run_id) - - def assign_global_keys_to_data_rows( - self, - global_key_to_data_row_inputs: List[Dict[str, str]], - timeout_seconds=60, - ) -> Dict[str, Union[str, List[Any]]]: - """ - Assigns global keys to data rows. - - Args: - A list of dicts containing data_row_id and global_key. - Returns: - Dictionary containing 'status', 'results' and 'errors'. - - 'Status' contains the outcome of this job. It can be one of - 'Success', 'Partial Success', or 'Failure'. - - 'Results' contains the successful global_key assignments, including - global_keys that have been sanitized to Labelbox standards. - - 'Errors' contains global_key assignments that failed, along with - the reasons for failure. - Examples: - >>> global_key_data_row_inputs = [ - {"data_row_id": "cl7asgri20yvo075b4vtfedjb", "global_key": "key1"}, - {"data_row_id": "cl7asgri10yvg075b4pz176ht", "global_key": "key2"}, - ] - >>> job_result = client.assign_global_keys_to_data_rows(global_key_data_row_inputs) - >>> print(job_result['status']) - Partial Success - >>> print(job_result['results']) - [{'data_row_id': 'cl7tv9wry00hlka6gai588ozv', 'global_key': 'gk', 'sanitized': False}] - >>> print(job_result['errors']) - [{'data_row_id': 'cl7tpjzw30031ka6g4evqdfoy', 'global_key': 'gk"', 'error': 'Invalid global key'}] - """ - - def _format_successful_rows( - rows: Dict[str, str], sanitized: bool - ) -> List[Dict[str, str]]: - return [ - { - "data_row_id": r["dataRowId"], - "global_key": r["globalKey"], - "sanitized": sanitized, - } - for r in rows - ] - - def _format_failed_rows( - rows: Dict[str, str], error_msg: str - ) -> List[Dict[str, str]]: - return [ - { - "data_row_id": r["dataRowId"], - "global_key": r["globalKey"], - "error": error_msg, - } - for r in rows - ] - - # Validate input dict - validation_errors = [] - for input in global_key_to_data_row_inputs: - if "data_row_id" not in input or "global_key" not in input: - validation_errors.append(input) - if len(validation_errors) > 0: - raise ValueError( - f"Must provide a list of dicts containing both `data_row_id` and `global_key`. The following dict(s) are invalid: {validation_errors}." - ) - - # Start assign global keys to data rows job - query_str = """mutation assignGlobalKeysToDataRowsPyApi($globalKeyDataRowLinks: [AssignGlobalKeyToDataRowInput!]!) { - assignGlobalKeysToDataRows(data: {assignInputs: $globalKeyDataRowLinks}) { - jobId - } - } - """ - params = { - "globalKeyDataRowLinks": [ - {utils.camel_case(key): value for key, value in input.items()} - for input in global_key_to_data_row_inputs - ] - } - assign_global_keys_to_data_rows_job = self.execute(query_str, params) - - # Query string for retrieving job status and result, if job is done - result_query_str = """query assignGlobalKeysToDataRowsResultPyApi($jobId: ID!) { - assignGlobalKeysToDataRowsResult(jobId: {id: $jobId}) { - jobStatus - data { - sanitizedAssignments { - dataRowId - globalKey - } - invalidGlobalKeyAssignments { - dataRowId - globalKey - } - unmodifiedAssignments { - dataRowId - globalKey - } - accessDeniedAssignments { - dataRowId - globalKey - } - }}} - """ - result_params = { - "jobId": assign_global_keys_to_data_rows_job[ - "assignGlobalKeysToDataRows" - ]["jobId"] - } - - # Poll job status until finished, then retrieve results - sleep_time = 2 - start_time = time.time() - while True: - res = self.execute(result_query_str, result_params) - if ( - res["assignGlobalKeysToDataRowsResult"]["jobStatus"] - == "COMPLETE" - ): - results, errors = [], [] - res = res["assignGlobalKeysToDataRowsResult"]["data"] - # Successful assignments - results.extend( - _format_successful_rows( - rows=res["sanitizedAssignments"], sanitized=True - ) - ) - results.extend( - _format_successful_rows( - rows=res["unmodifiedAssignments"], sanitized=False - ) - ) - # Failed assignments - errors.extend( - _format_failed_rows( - rows=res["invalidGlobalKeyAssignments"], - error_msg="Invalid assignment. Either DataRow does not exist, or globalKey is invalid", - ) - ) - errors.extend( - _format_failed_rows( - rows=res["accessDeniedAssignments"], - error_msg="Access denied to Data Row", - ) - ) - - if not errors: - status = CollectionJobStatus.SUCCESS.value - elif errors and results: - status = CollectionJobStatus.PARTIAL_SUCCESS.value - else: - status = CollectionJobStatus.FAILURE.value - - if errors: - logger.warning( - "There are errors present. Please look at 'errors' in the returned dict for more details" - ) - - return { - "status": status, - "results": results, - "errors": errors, - } - elif ( - res["assignGlobalKeysToDataRowsResult"]["jobStatus"] == "FAILED" - ): - raise lbox.exceptions.LabelboxError( - "Job assign_global_keys_to_data_rows failed." - ) - current_time = time.time() - if current_time - start_time > timeout_seconds: - raise lbox.exceptions.TimeoutError( - "Timed out waiting for assign_global_keys_to_data_rows job to complete." - ) - time.sleep(sleep_time) - - def get_data_row_ids_for_global_keys( - self, global_keys: List[str], timeout_seconds=60 - ) -> Dict[str, Union[str, List[Any]]]: - """ - Gets data row ids for a list of global keys. - - Deprecation Notice: This function will soon no longer return 'Deleted Data Rows' - as part of the 'results'. Global keys for deleted data rows will soon be placed - under 'Data Row not found' portion. - - Args: - A list of global keys - Returns: - Dictionary containing 'status', 'results' and 'errors'. - - 'Status' contains the outcome of this job. It can be one of - 'Success', 'Partial Success', or 'Failure'. - - 'Results' contains a list of the fetched corresponding data row ids in the input order. - For data rows that cannot be fetched due to an error, or data rows that do not exist, - empty string is returned at the position of the respective global_key. - More error information can be found in the 'Errors' section. - - 'Errors' contains a list of global_keys that could not be fetched, along - with the failure reason - Examples: - >>> job_result = client.get_data_row_ids_for_global_keys(["key1","key2"]) - >>> print(job_result['status']) - Partial Success - >>> print(job_result['results']) - ['cl7tv9wry00hlka6gai588ozv', 'cl7tv9wxg00hpka6gf8sh81bj'] - >>> print(job_result['errors']) - [{'global_key': 'asdf', 'error': 'Data Row not found'}] - """ - - def _format_failed_rows( - rows: List[str], error_msg: str - ) -> List[Dict[str, str]]: - return [{"global_key": r, "error": error_msg} for r in rows] - - # Start get data rows for global keys job - query_str = """query getDataRowsForGlobalKeysPyApi($globalKeys: [ID!]!) { - dataRowsForGlobalKeys(where: {ids: $globalKeys}) { jobId}} - """ - params = {"globalKeys": global_keys} - data_rows_for_global_keys_job = self.execute(query_str, params) - - # Query string for retrieving job status and result, if job is done - result_query_str = """query getDataRowsForGlobalKeysResultPyApi($jobId: ID!) { - dataRowsForGlobalKeysResult(jobId: {id: $jobId}) { data { - fetchedDataRows { id } - notFoundGlobalKeys - accessDeniedGlobalKeys - } jobStatus}} - """ - result_params = { - "jobId": data_rows_for_global_keys_job["dataRowsForGlobalKeys"][ - "jobId" - ] - } - - # Poll job status until finished, then retrieve results - sleep_time = 2 - start_time = time.time() - while True: - res = self.execute(result_query_str, result_params) - if res["dataRowsForGlobalKeysResult"]["jobStatus"] == "COMPLETE": - data = res["dataRowsForGlobalKeysResult"]["data"] - results, errors = [], [] - results.extend([row["id"] for row in data["fetchedDataRows"]]) - errors.extend( - _format_failed_rows( - data["notFoundGlobalKeys"], "Data Row not found" - ) - ) - errors.extend( - _format_failed_rows( - data["accessDeniedGlobalKeys"], - "Access denied to Data Row", - ) - ) - - # Invalid results may contain empty string, so we must filter - # them prior to checking for PARTIAL_SUCCESS - filtered_results = list(filter(lambda r: r != "", results)) - if not errors: - status = CollectionJobStatus.SUCCESS.value - elif errors and len(filtered_results) > 0: - status = CollectionJobStatus.PARTIAL_SUCCESS.value - else: - status = CollectionJobStatus.FAILURE.value - - if errors: - logger.warning( - "There are errors present. Please look at 'errors' in the returned dict for more details" - ) - - return {"status": status, "results": results, "errors": errors} - elif res["dataRowsForGlobalKeysResult"]["jobStatus"] == "FAILED": - raise lbox.exceptions.LabelboxError( - "Job dataRowsForGlobalKeys failed." - ) - current_time = time.time() - if current_time - start_time > timeout_seconds: - raise lbox.exceptions.TimeoutError( - "Timed out waiting for get_data_rows_for_global_keys job to complete." - ) - time.sleep(sleep_time) - - def clear_global_keys( - self, global_keys: List[str], timeout_seconds=60 - ) -> Dict[str, Union[str, List[Any]]]: - """ - Clears global keys for the data rows tha correspond to the global keys provided. - - Args: - A list of global keys - Returns: - Dictionary containing 'status', 'results' and 'errors'. - - 'Status' contains the outcome of this job. It can be one of - 'Success', 'Partial Success', or 'Failure'. - - 'Results' contains a list global keys that were successfully cleared. - - 'Errors' contains a list of global_keys correspond to the data rows that could not be - modified, accessed by the user, or not found. - Examples: - >>> job_result = client.clear_global_keys(["key1","key2","notfoundkey"]) - >>> print(job_result['status']) - Partial Success - >>> print(job_result['results']) - ['key1', 'key2'] - >>> print(job_result['errors']) - [{'global_key': 'notfoundkey', 'error': 'Failed to find data row matching provided global key'}] - """ - - def _format_failed_rows( - rows: List[str], error_msg: str - ) -> List[Dict[str, str]]: - return [{"global_key": r, "error": error_msg} for r in rows] - - # Start get data rows for global keys job - query_str = """mutation clearGlobalKeysPyApi($globalKeys: [ID!]!) { - clearGlobalKeys(where: {ids: $globalKeys}) { jobId}} - """ - params = {"globalKeys": global_keys} - clear_global_keys_job = self.execute(query_str, params) - - # Query string for retrieving job status and result, if job is done - result_query_str = """query clearGlobalKeysResultPyApi($jobId: ID!) { - clearGlobalKeysResult(jobId: {id: $jobId}) { data { - clearedGlobalKeys - failedToClearGlobalKeys - notFoundGlobalKeys - accessDeniedGlobalKeys - } jobStatus}} - """ - result_params = { - "jobId": clear_global_keys_job["clearGlobalKeys"]["jobId"] - } - # Poll job status until finished, then retrieve results - sleep_time = 2 - start_time = time.time() - while True: - res = self.execute(result_query_str, result_params) - if res["clearGlobalKeysResult"]["jobStatus"] == "COMPLETE": - data = res["clearGlobalKeysResult"]["data"] - results, errors = [], [] - results.extend(data["clearedGlobalKeys"]) - errors.extend( - _format_failed_rows( - data["failedToClearGlobalKeys"], - "Clearing global key failed", - ) - ) - errors.extend( - _format_failed_rows( - data["notFoundGlobalKeys"], - "Failed to find data row matching provided global key", - ) - ) - errors.extend( - _format_failed_rows( - data["accessDeniedGlobalKeys"], - "Denied access to modify data row matching provided global key", - ) - ) - - if not errors: - status = CollectionJobStatus.SUCCESS.value - elif errors and len(results) > 0: - status = CollectionJobStatus.PARTIAL_SUCCESS.value - else: - status = CollectionJobStatus.FAILURE.value - - if errors: - logger.warning( - "There are errors present. Please look at 'errors' in the returned dict for more details" - ) - - return {"status": status, "results": results, "errors": errors} - elif res["clearGlobalKeysResult"]["jobStatus"] == "FAILED": - raise lbox.exceptions.LabelboxError( - "Job clearGlobalKeys failed." - ) - current_time = time.time() - if current_time - start_time > timeout_seconds: - raise lbox.exceptions.TimeoutError( - "Timed out waiting for clear_global_keys job to complete." - ) - time.sleep(sleep_time) - - def get_catalog(self) -> Catalog: - return Catalog(client=self) - - def get_catalog_slice(self, slice_id) -> CatalogSlice: - """ - Fetches a Catalog Slice by ID. - - Args: - slice_id (str): The ID of the Slice - Returns: - CatalogSlice - """ - query_str = """query getSavedQueryPyApi($id: ID!) { - getSavedQuery(id: $id) { - id - name - description - filter - createdAt - updatedAt - } - } - """ - res = self.execute(query_str, {"id": slice_id}) - return Entity.CatalogSlice(self, res["getSavedQuery"]) - - def is_feature_schema_archived( - self, ontology_id: str, feature_schema_id: str - ) -> bool: - """ - Returns true if a feature schema is archived in the specified ontology, returns false otherwise. - - Args: - feature_schema_id (str): The ID of the feature schema - ontology_id (str): The ID of the ontology - Returns: - bool - """ - - ontology_endpoint = ( - self.rest_endpoint - + "/ontologies/" - + urllib.parse.quote(ontology_id) - ) - response = self.connection.get(ontology_endpoint) - - if response.status_code == requests.codes.ok: - feature_schema_nodes = response.json()["featureSchemaNodes"] - tools = feature_schema_nodes["tools"] - classifications = feature_schema_nodes["classifications"] - relationships = feature_schema_nodes["relationships"] - feature_schema_node_list = tools + classifications + relationships - filtered_feature_schema_nodes = [ - feature_schema_node - for feature_schema_node in feature_schema_node_list - if feature_schema_node["featureSchemaId"] == feature_schema_id - ] - if filtered_feature_schema_nodes: - return bool(filtered_feature_schema_nodes[0]["archived"]) - else: - raise lbox.exceptions.LabelboxError( - "The specified feature schema was not in the ontology." - ) - - elif response.status_code == 404: - raise lbox.exceptions.ResourceNotFoundError(Ontology, ontology_id) - else: - raise lbox.exceptions.LabelboxError( - "Failed to get the feature schema archived status." - ) - - def get_model_slice(self, slice_id) -> ModelSlice: - """ - Fetches a Model Slice by ID. - - Args: - slice_id (str): The ID of the Slice - Returns: - ModelSlice - """ - query_str = """ - query getSavedQueryPyApi($id: ID!) { - getSavedQuery(id: $id) { - id - name - description - filter - createdAt - updatedAt - } - } - """ - res = self.execute(query_str, {"id": slice_id}) - if res is None or res["getSavedQuery"] is None: - raise lbox.exceptions.ResourceNotFoundError(ModelSlice, slice_id) - - return Entity.ModelSlice(self, res["getSavedQuery"]) - - def delete_feature_schema_from_ontology( - self, ontology_id: str, feature_schema_id: str - ) -> DeleteFeatureFromOntologyResult: - """ - Deletes or archives a feature schema from an ontology. - If the feature schema is a root level node with associated labels, it will be archived. - If the feature schema is a nested node in the ontology and does not have associated labels, it will be deleted. - If the feature schema is a nested node in the ontology and has associated labels, it will not be deleted. - - Args: - ontology_id (str): The ID of the ontology. - feature_schema_id (str): The ID of the feature schema. - - Returns: - DeleteFeatureFromOntologyResult: The result of the feature schema removal. - - Example: - >>> client.delete_feature_schema_from_ontology(, ) - """ - ontology_endpoint = ( - self.rest_endpoint - + "/ontologies/" - + urllib.parse.quote(ontology_id) - + "/feature-schemas/" - + urllib.parse.quote(feature_schema_id) - ) - response = self.connection.delete(ontology_endpoint) - - if response.status_code == requests.codes.ok: - response_json = response.json() - if response_json["archived"] is True: - logger.info( - "Feature schema was archived from the ontology because it had associated labels." - ) - elif response_json["deleted"] is True: - logger.info( - "Feature schema was successfully removed from the ontology" - ) - result = DeleteFeatureFromOntologyResult() - result.archived = bool(response_json["archived"]) - result.deleted = bool(response_json["deleted"]) - return result - else: - raise lbox.exceptions.LabelboxError( - "Failed to remove feature schema from ontology, message: " - + str(response.json()["message"]) - ) - - def unarchive_feature_schema_node( - self, ontology_id: str, root_feature_schema_id: str - ) -> None: - """ - Unarchives a feature schema node in an ontology. - Only root level feature schema nodes can be unarchived. - Args: - ontology_id (str): The ID of the ontology - root_feature_schema_id (str): The ID of the root level feature schema - Returns: - None - """ - ontology_endpoint = ( - self.rest_endpoint - + "/ontologies/" - + urllib.parse.quote(ontology_id) - + "/feature-schemas/" - + urllib.parse.quote(root_feature_schema_id) - + "/unarchive" - ) - response = self.connection.patch(ontology_endpoint) - if response.status_code == requests.codes.ok: - if not bool(response.json()["unarchived"]): - raise lbox.exceptions.LabelboxError( - "Failed unarchive the feature schema." - ) - else: - raise lbox.exceptions.LabelboxError( - "Failed unarchive the feature schema node, message: ", - response.text, - ) - - def get_batch(self, project_id: str, batch_id: str) -> Entity.Batch: - # obtain batch entity to return - get_batch_str = """query %s($projectId: ID!, $batchId: ID!) { - project(where: {id: $projectId}) { - batches(where: {id: $batchId}) { - nodes { - %s - } - } - } - } - """ % ( - "getProjectBatchPyApi", - query.results_query_part(Entity.Batch), - ) - - batch = self.execute( - get_batch_str, - {"projectId": project_id, "batchId": batch_id}, - timeout=180.0, - experimental=True, - )["project"]["batches"]["nodes"][0] - - return Entity.Batch(self, project_id, batch) - - def send_to_annotate_from_catalog( - self, - destination_project_id: str, - task_queue_id: Optional[str], - batch_name: str, - data_rows: Union[DataRowIds, GlobalKeys], - params: Dict[str, Any], - ): - """ - Sends data rows from catalog to a specified project for annotation. - - Example usage: - >>> task = client.send_to_annotate_from_catalog( - >>> destination_project_id=DESTINATION_PROJECT_ID, - >>> task_queue_id=TASK_QUEUE_ID, - >>> batch_name="batch_name", - >>> data_rows=UniqueIds([DATA_ROW_ID]), - >>> params={ - >>> "source_project_id": - >>> SOURCE_PROJECT_ID, - >>> "override_existing_annotations_rule": - >>> ConflictResolutionStrategy.OverrideWithAnnotations - >>> }) - >>> task.wait_till_done() - - Args: - destination_project_id: The ID of the project to send the data rows to. - task_queue_id: The ID of the task queue to send the data rows to. If not specified, the data rows will be - sent to the Done workflow state. - batch_name: The name of the batch to create. If more than one batch is created, additional batches will be - named with a monotonically increasing numerical suffix, starting at "_1". - data_rows: The data rows to send to the project. - params: Additional parameters to configure the job. See SendToAnnotateFromCatalogParams for more details. - - Returns: The created task for this operation. - - """ - - validated_params = SendToAnnotateFromCatalogParams(**params) - - mutation_str = """mutation SendToAnnotateFromCatalogPyApi($input: SendToAnnotateFromCatalogInput!) { - sendToAnnotateFromCatalog(input: $input) { - taskId - } - } - """ - - destination_task_queue = build_destination_task_queue_input( - task_queue_id - ) - data_rows_query = self.build_catalog_query(data_rows) - - predictions_input = ( - build_predictions_input( - validated_params.predictions_ontology_mapping, - validated_params.source_model_run_id, - ) - if validated_params.source_model_run_id - else None - ) - - annotations_input = ( - build_annotations_input( - validated_params.annotations_ontology_mapping, - validated_params.source_project_id, - ) - if validated_params.source_project_id - else None - ) - - res = self.execute( - mutation_str, - { - "input": { - "destinationProjectId": destination_project_id, - "batchInput": { - "batchName": batch_name, - "batchPriority": validated_params.batch_priority, - }, - "destinationTaskQueue": destination_task_queue, - "excludeDataRowsInProject": validated_params.exclude_data_rows_in_project, - "annotationsInput": annotations_input, - "predictionsInput": predictions_input, - "conflictLabelsResolutionStrategy": validated_params.override_existing_annotations_rule, - "searchQuery": {"scope": None, "query": [data_rows_query]}, - "ordering": { - "type": "RANDOM", - "random": {"seed": random.randint(0, 10000)}, - "sorting": None, - }, - "sorting": None, - "limit": None, - } - }, - )["sendToAnnotateFromCatalog"] - - return Entity.Task.get_task(self, res["taskId"]) - - @staticmethod - def build_catalog_query(data_rows: Union[DataRowIds, GlobalKeys]): - """ - Given a list of data rows, builds a query that can be used to fetch the associated data rows from the catalog. - - Args: - data_rows: A list of data rows. Can be either UniqueIds or GlobalKeys. - - Returns: A query that can be used to fetch the associated data rows from the catalog. - - """ - if isinstance(data_rows, DataRowIds): - data_rows_query = { - "type": "data_row_id", - "operator": "is", - "ids": list(data_rows), - } - elif isinstance(data_rows, GlobalKeys): - data_rows_query = { - "type": "global_key", - "operator": "is", - "ids": list(data_rows), - } - else: - raise ValueError( - f"Invalid data_rows type {type(data_rows)}. Type of data_rows must be DataRowIds or GlobalKey" - ) - return data_rows_query - - def run_foundry_app( - self, - model_run_name: str, - data_rows: Union[DataRowIds, GlobalKeys], - app_id: str, - ) -> Task: - """ - Run a foundry app - - Args: - model_run_name (str): Name of a new model run to store app predictions in - data_rows (DataRowIds or GlobalKeys): Data row identifiers to run predictions on - app_id (str): Foundry app to run predictions with - """ - foundry_client = FoundryClient(self) - return foundry_client.run_app(model_run_name, data_rows, app_id) - - def create_embedding(self, name: str, dims: int) -> Embedding: - """ - Create a new embedding. You must provide a name and the - number of dimensions the embedding has. Once an - embedding has been created, you can upload the vector - data associated with the embedding id. - - Args: - name: The name of the embedding. - dims: The number of dimensions. - - Returns: - A new Embedding object. - """ - data = self._adv_client.create_embedding(name, dims) - return Embedding(self._adv_client, **data) - - def get_embeddings(self) -> List[Embedding]: - """ - Return a list of all embeddings for the current organization. - - Returns: - A list of embedding objects. - """ - results = self._adv_client.get_embeddings() - return [Embedding(self._adv_client, **data) for data in results] - - def get_embedding_by_id(self, id: str) -> Embedding: - """ - Return the embedding for the provided embedding id. - - Args: - id: The embedding ID. - - Returns: - The embedding object. - """ - data = self._adv_client.get_embedding(id) - return Embedding(self._adv_client, **data) - - def get_embedding_by_name(self, name: str) -> Embedding: - """ - Return the embedding for the provided embedding name. - - Args: - name: The embedding name - - Returns: - The embedding object. - """ - # NB: It's safe to do the filtering client-side as we only allow 10 embeddings per org. - embeddings = self.get_embeddings() - for e in embeddings: - if e.name == name: - return e - raise lbox.exceptions.ResourceNotFoundError(Embedding, dict(name=name)) - - def upsert_label_feedback( - self, label_id: str, feedback: str, scores: Dict[str, float] - ) -> List[LabelScore]: - """ - Submits the label feedback which is a free-form text and numeric - label scores. - - Args: - label_id: Target label ID - feedback: Free text comment regarding the label - scores: A dict of scores, the key is a score name and the value is - the score value - - Returns: - A list of LabelScore instances - """ - mutation_str = """ - mutation UpsertAutoQaLabelFeedbackPyApi( - $labelId: ID! - $feedback: String! - $scores: Json! - ) { - upsertAutoQaLabelFeedback( - input: { - labelId: $labelId, - feedback: $feedback, - scores: $scores - } - ) { - id - scores { - id - name - score - } - } - } - """ - res = self.execute( - mutation_str, - {"labelId": label_id, "feedback": feedback, "scores": scores}, - ) - scores_raw = res["upsertAutoQaLabelFeedback"]["scores"] - - return [ - LabelScore(name=x["name"], score=x["score"]) for x in scores_raw - ] - - def get_labeling_service_dashboards( - self, - search_query: Optional[List[SearchFilter]] = None, - ) -> PaginatedCollection: - """ - Get all labeling service dashboards for a given org. - - Optional parameters: - search_query: A list of search filters representing the search - - NOTE: - - Retrieves all projects for the organization or as filtered by the search query - - INCLUDING those not requesting labeling services - - Sorted by project created date in ascending order. - - Examples: - Retrieves all labeling service dashboards for a given workspace id: - >>> workspace_filter = WorkspaceFilter( - >>> operation=OperationType.Workspace, - >>> operator=IdOperator.Is, - >>> values=[workspace_id]) - >>> labeling_service_dashboard = [ - >>> ld for ld in project.client.get_labeling_service_dashboards(search_query=[workspace_filter])] - - Retrieves all labeling service dashboards requested less than 7 days ago: - >>> seven_days_ago = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") - >>> workforce_requested_filter_before = WorkforceRequestedDateFilter( - >>> operation=OperationType.WorforceRequestedDate, - >>> value=DateValue(operator=RangeDateTimeOperatorWithSingleValue.GreaterThanOrEqual, - >>> value=seven_days_ago)) - >>> labeling_service_dashboard = [ld for ld in project.client.get_labeling_service_dashboards(search_query=[workforce_requested_filter_before])] - - See libs/labelbox/src/labelbox/schema/search_filters.py and libs/labelbox/tests/unit/test_unit_search_filters.py for more examples. - """ - return LabelingServiceDashboard.get_all(self, search_query=search_query) - - def get_task_by_id(self, task_id: str) -> Union[Task, DataUpsertTask]: - """ - Fetches a task by ID. - - Args: - task_id (str): The ID of the task. - - Returns: - Task or DataUpsertTask - - Throws: - ResourceNotFoundError: If the task does not exist. - - NOTE: Export task is not supported yet - """ - user = self.get_user() - query = """ - query GetUserCreatedTasksPyApi($userId: ID!, $taskId: ID!) { - user(where: {id: $userId}) { - createdTasks(where: {id: $taskId} skip: 0 first: 1) { - completionPercentage - createdAt - errors - metadata - name - result - status - type - id - updatedAt - } - } - } - """ - result = self.execute(query, {"userId": user.uid, "taskId": task_id}) - data = result.get("user", {}).get("createdTasks", []) - if not data: - raise lbox.exceptions.ResourceNotFoundError( - message=f"The task {task_id} does not exist." - ) - task_data = data[0] - if task_data["type"].lower() == "adv-upsert-data-rows": - task = DataUpsertTask(self, task_data) - else: - task = Task(self, task_data) - - task._user = user - return task From 4be8069f1fdd085a119654c043f95eb747119fb7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:10:15 -0500 Subject: [PATCH 38/44] Add client back in --- libs/labelbox/src/labelbox/client.py | 2450 ++++++++++++++++++++++++++ 1 file changed, 2450 insertions(+) create mode 100644 libs/labelbox/src/labelbox/client.py diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py new file mode 100644 index 000000000..cccd23be1 --- /dev/null +++ b/libs/labelbox/src/labelbox/client.py @@ -0,0 +1,2450 @@ +# type: ignore +import json +import logging +import mimetypes +import os +import random +import time +import urllib.parse +from collections import defaultdict +from types import MappingProxyType +from typing import Any, Callable, Dict, List, Optional, Union, overload + +import lbox.exceptions +import requests +import requests.exceptions +from google.api_core import retry +from lbox.request_client import RequestClient + +from labelbox import __version__ as SDK_VERSION +from labelbox import utils +from labelbox.adv_client import AdvClient +from labelbox.orm import query +from labelbox.orm.db_object import DbObject +from labelbox.orm.model import Entity, Field +from labelbox.pagination import PaginatedCollection +from labelbox.schema import role +from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRow +from labelbox.schema.data_row_metadata import DataRowMetadataOntology +from labelbox.schema.dataset import Dataset +from labelbox.schema.embedding import Embedding +from labelbox.schema.enums import CollectionJobStatus +from labelbox.schema.foundry.foundry_client import FoundryClient +from labelbox.schema.iam_integration import IAMIntegration +from labelbox.schema.identifiables import DataRowIds, GlobalKeys +from labelbox.schema.label_score import LabelScore +from labelbox.schema.labeling_frontend import LabelingFrontend +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +from labelbox.schema.media_type import ( + MediaType, + get_media_type_validation_error, +) +from labelbox.schema.model import Model +from labelbox.schema.model_config import ModelConfig +from labelbox.schema.model_run import ModelRun +from labelbox.schema.ontology import ( + Classification, + DeleteFeatureFromOntologyResult, + FeatureSchema, + Ontology, + PromptResponseClassification, + Tool, +) +from labelbox.schema.ontology_kind import ( + EditorTaskType, + EditorTaskTypeMapper, + OntologyKind, +) +from labelbox.schema.organization import Organization +from labelbox.schema.project import Project +from labelbox.schema.quality_mode import ( + BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS, + BENCHMARK_AUTO_AUDIT_PERCENTAGE, + CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS, + CONSENSUS_AUTO_AUDIT_PERCENTAGE, + QualityMode, +) +from labelbox.schema.queue_mode import QueueMode +from labelbox.schema.role import Role +from labelbox.schema.search_filters import SearchFilter +from labelbox.schema.send_to_annotate_params import ( + SendToAnnotateFromCatalogParams, + build_annotations_input, + build_destination_task_queue_input, + build_predictions_input, +) +from labelbox.schema.slice import CatalogSlice, ModelSlice +from labelbox.schema.task import DataUpsertTask, Task +from labelbox.schema.user import User + +logger = logging.getLogger(__name__) + + +class Client: + """A Labelbox client. + + Provides functions for querying and creating + top-level data objects (Projects, Datasets). + """ + + def __init__( + self, + api_key=None, + endpoint="https://api.labelbox.com/graphql", + enable_experimental=False, + app_url="https://app.labelbox.com", + rest_endpoint="https://api.labelbox.com/api/v1", + ): + """Creates and initializes a Labelbox Client. + + Logging is defaulted to level WARNING. To receive more verbose + output to console, update `logging.level` to the appropriate level. + + >>> logging.basicConfig(level = logging.INFO) + >>> client = Client("") + + Args: + api_key (str): API key. If None, the key is obtained from the "LABELBOX_API_KEY" environment variable. + endpoint (str): URL of the Labelbox server to connect to. + enable_experimental (bool): Indicates whether or not to use experimental features + app_url (str) : host url for all links to the web app + Raises: + lbox.exceptions.AuthenticationError: If no `api_key` + is provided as an argument or via the environment + variable. + """ + self._data_row_metadata_ontology = None + self._request_client = RequestClient( + sdk_version=SDK_VERSION, + api_key=api_key, + endpoint=endpoint, + enable_experimental=enable_experimental, + app_url=app_url, + rest_endpoint=rest_endpoint, + ) + self._adv_client = AdvClient.factory(rest_endpoint, api_key) + + @property + def headers(self) -> MappingProxyType: + return self._request_client.headers + + @property + def connection(self) -> requests.Session: + return self._request_client._connection + + @property + def endpoint(self) -> str: + return self._request_client.endpoint + + @property + def rest_endpoint(self) -> str: + return self._request_client.rest_endpoint + + @property + def enable_experimental(self) -> bool: + return self._request_client.enable_experimental + + @property + def app_url(self) -> str: + return self._request_client.app_url + + def execute( + self, + query=None, + params=None, + data=None, + files=None, + timeout=60.0, + experimental=False, + error_log_key="message", + raise_return_resource_not_found=False, + error_handlers: Optional[ + Dict[str, Callable[[requests.models.Response], None]] + ] = None, + ) -> Dict[str, Any]: + """Executes a GraphQL query. + + Args: + query (str): The query to execute. + variables (dict): Variables to pass to the query. + raise_return_resource_not_found (bool): If True, raise a + ResourceNotFoundError if the query returns None. + error_handlers (dict): A dictionary mapping graphql error code to handler functions. + Allows a caller to handle specific errors reporting in a custom way or produce more user-friendly readable messages + + Returns: + dict: The response from the server. + """ + return self._request_client.execute( + query, + params, + data=data, + files=files, + timeout=timeout, + experimental=experimental, + error_log_key=error_log_key, + raise_return_resource_not_found=raise_return_resource_not_found, + error_handlers=error_handlers, + ) + + def upload_file(self, path: str) -> str: + """Uploads given path to local file. + + Also includes best guess at the content type of the file. + + Args: + path (str): path to local file to be uploaded. + Returns: + str, the URL of uploaded data. + Raises: + lbox.exceptions.LabelboxError: If upload failed. + """ + content_type, _ = mimetypes.guess_type(path) + filename = os.path.basename(path) + with open(path, "rb") as f: + return self.upload_data( + content=f.read(), filename=filename, content_type=content_type + ) + + @retry.Retry( + predicate=retry.if_exception_type(lbox.exceptions.InternalServerError) + ) + def upload_data( + self, + content: bytes, + filename: str = None, + content_type: str = None, + sign: bool = False, + ) -> str: + """Uploads the given data (bytes) to Labelbox. + + Args: + content: bytestring to upload + filename: name of the upload + content_type: content type of data uploaded + sign: whether or not to sign the url + + Returns: + str, the URL of uploaded data. + + Raises: + lbox.exceptions.LabelboxError: If upload failed. + """ + + request_data = { + "operations": json.dumps( + { + "variables": { + "file": None, + "contentLength": len(content), + "sign": sign, + }, + "query": """mutation UploadFile($file: Upload!, $contentLength: Int!, + $sign: Boolean) { + uploadFile(file: $file, contentLength: $contentLength, + sign: $sign) {url filename} } """, + } + ), + "map": (None, json.dumps({"1": ["variables.file"]})), + } + + files = { + "1": (filename, content, content_type) + if (filename and content_type) + else content + } + headers = self.connection.headers.copy() + headers.pop("Content-Type", None) + request = requests.Request( + "POST", + self.endpoint, + headers=headers, + data=request_data, + files=files, + ) + + prepped: requests.PreparedRequest = request.prepare() + + response = self.connection.send(prepped) + + if response.status_code == 502: + error_502 = "502 Bad Gateway" + raise lbox.exceptions.InternalServerError(error_502) + elif response.status_code == 503: + raise lbox.exceptions.InternalServerError(response.text) + elif response.status_code == 520: + raise lbox.exceptions.InternalServerError(response.text) + + try: + file_data = response.json().get("data", None) + except ValueError as e: # response is not valid JSON + raise lbox.exceptions.LabelboxError( + "Failed to upload, unknown cause", e + ) + + if not file_data or not file_data.get("uploadFile", None): + try: + errors = response.json().get("errors", []) + error_msg = next(iter(errors), {}).get( + "message", "Unknown error" + ) + except Exception: + error_msg = "Unknown error" + raise lbox.exceptions.LabelboxError( + "Failed to upload, message: %s" % error_msg + ) + + return file_data["uploadFile"]["url"] + + def _get_single(self, db_object_type, uid): + """Fetches a single object of the given type, for the given ID. + + Args: + db_object_type (type): DbObject subclass. + uid (str): Unique ID of the row. + Returns: + Object of `db_object_type`. + Raises: + lbox.exceptions.ResourceNotFoundError: If there is no object + of the given type for the given ID. + """ + query_str, params = query.get_single(db_object_type, uid) + + res = self.execute(query_str, params) + res = res and res.get(utils.camel_case(db_object_type.type_name())) + if res is None: + raise lbox.exceptions.ResourceNotFoundError(db_object_type, params) + else: + return db_object_type(self, res) + + def get_project(self, project_id) -> Project: + """Gets a single Project with the given ID. + + >>> project = client.get_project("") + + Args: + project_id (str): Unique ID of the Project. + Returns: + The sought Project. + Raises: + lbox.exceptions.ResourceNotFoundError: If there is no + Project with the given ID. + """ + return self._get_single(Entity.Project, project_id) + + def get_dataset(self, dataset_id) -> Dataset: + """Gets a single Dataset with the given ID. + + >>> dataset = client.get_dataset("") + + Args: + dataset_id (str): Unique ID of the Dataset. + Returns: + The sought Dataset. + Raises: + lbox.exceptions.ResourceNotFoundError: If there is no + Dataset with the given ID. + """ + return self._get_single(Entity.Dataset, dataset_id) + + def get_user(self) -> User: + """Gets the current User database object. + + >>> user = client.get_user() + """ + return self._get_single(Entity.User, None) + + def get_organization(self) -> Organization: + """Gets the Organization DB object of the current user. + + >>> organization = client.get_organization() + """ + return self._get_single(Entity.Organization, None) + + def _get_all(self, db_object_type, where, filter_deleted=True): + """Fetches all the objects of the given type the user has access to. + + Args: + db_object_type (type): DbObject subclass. + where (Comparison, LogicalOperation or None): The `where` clause + for filtering. + Returns: + An iterable of `db_object_type` instances. + """ + if filter_deleted: + not_deleted = db_object_type.deleted == False # noqa: E712 Needed for bit operator to combine comparisons + where = not_deleted if where is None else where & not_deleted + query_str, params = query.get_all(db_object_type, where) + + return PaginatedCollection( + self, + query_str, + params, + [utils.camel_case(db_object_type.type_name()) + "s"], + db_object_type, + ) + + def get_projects(self, where=None) -> PaginatedCollection: + """Fetches all the projects the user has access to. + + >>> projects = client.get_projects(where=(Project.name == "") & (Project.description == "")) + + Args: + where (Comparison, LogicalOperation or None): The `where` clause + for filtering. + Returns: + PaginatedCollection of all projects the user has access to or projects matching the criteria specified. + """ + return self._get_all(Entity.Project, where) + + def get_users(self, where=None) -> PaginatedCollection: + """Fetches all the users. + + >>> users = client.get_users(where=User.email == "") + + Args: + where (Comparison, LogicalOperation or None): The `where` clause + for filtering. + Returns: + An iterable of Users (typically a PaginatedCollection). + """ + return self._get_all(Entity.User, where, filter_deleted=False) + + def get_datasets(self, where=None) -> PaginatedCollection: + """Fetches one or more datasets. + + >>> datasets = client.get_datasets(where=(Dataset.name == "") & (Dataset.description == "")) + + Args: + where (Comparison, LogicalOperation or None): The `where` clause + for filtering. + Returns: + PaginatedCollection of all datasets the user has access to or datasets matching the criteria specified. + """ + return self._get_all(Entity.Dataset, where) + + def get_labeling_frontends(self, where=None) -> List[LabelingFrontend]: + """Fetches all the labeling frontends. + + >>> frontend = client.get_labeling_frontends(where=LabelingFrontend.name == "Editor") + + Args: + where (Comparison, LogicalOperation or None): The `where` clause + for filtering. + Returns: + An iterable of LabelingFrontends (typically a PaginatedCollection). + """ + return self._get_all(Entity.LabelingFrontend, where) + + def _create(self, db_object_type, data, extra_params={}): + """Creates an object on the server. Attribute values are + passed as keyword arguments: + + Args: + db_object_type (type): A DbObjectType subtype. + data (dict): Keys are attributes or their names (in Python, + snake-case convention) and values are desired attribute values. + extra_params (dict): Additional parameters to pass to GraphQL. + These have to be Field(...): value pairs. + Returns: + A new object of the given DB object type. + Raises: + InvalidAttributeError: If the DB object type does not contain + any of the attribute names given in `data`. + """ + # Convert string attribute names to Field or Relationship objects. + # Also convert Labelbox object values to their UIDs. + data = { + db_object_type.attribute(attr) + if isinstance(attr, str) + else attr: value.uid if isinstance(value, DbObject) else value + for attr, value in data.items() + } + + data = {**data, **extra_params} + query_string, params = query.create(db_object_type, data) + res = self.execute( + query_string, params, raise_return_resource_not_found=True + ) + + if not res: + raise lbox.exceptions.LabelboxError( + "Failed to create %s" % db_object_type.type_name() + ) + res = res["create%s" % db_object_type.type_name()] + + return db_object_type(self, res) + + def create_model_config( + self, name: str, model_id: str, inference_params: dict + ) -> ModelConfig: + """Creates a new model config with the given params. + Model configs are scoped to organizations, and can be reused between projects. + + Args: + name (str): Name of the model config + model_id (str): ID of model to configure + inference_params (dict): JSON of model configuration parameters. + + Returns: + str, id of the created model config + """ + if not name: + raise ValueError("Model config name must not be an empty string.") + + query = """mutation CreateModelConfigPyApi($modelId: ID!, $inferenceParams: Json!, $name: String!) { + createModelConfig(input: {modelId: $modelId, inferenceParams: $inferenceParams, name: $name}) { + modelId + inferenceParams + id + name + } + }""" + params = { + "modelId": model_id, + "inferenceParams": inference_params, + "name": name, + } + result = self.execute(query, params) + return ModelConfig(self, result["createModelConfig"]) + + def delete_model_config(self, id: str) -> bool: + """Deletes an existing model config with the given id + + Args: + id (str): ID of existing model config + + Returns: + bool, indicates if the operation was a success. + """ + + query = """mutation DeleteModelConfigPyApi($id: ID!) { + deleteModelConfig(input: {id: $id}) { + success + } + }""" + params = {"id": id} + result = self.execute(query, params) + if not result: + raise lbox.exceptions.ResourceNotFoundError( + Entity.ModelConfig, params + ) + return result["deleteModelConfig"]["success"] + + def create_dataset( + self, iam_integration=IAMIntegration._DEFAULT, **kwargs + ) -> Dataset: + """Creates a Dataset object on the server. + + Attribute values are passed as keyword arguments. + + Args: + iam_integration (IAMIntegration) : Uses the default integration. + Optionally specify another integration or set as None to not use delegated access + **kwargs: Keyword arguments with Dataset attribute values. + Returns: + A new Dataset object. + Raises: + InvalidAttributeError: If the Dataset type does not contain + any of the attribute names given in kwargs. + Examples: + Create a dataset + >>> dataset = client.create_dataset(name="") + Create a dataset with description + >>> dataset = client.create_dataset(name="", description="") + """ + dataset = self._create(Entity.Dataset, kwargs) + if iam_integration == IAMIntegration._DEFAULT: + iam_integration = ( + self.get_organization().get_default_iam_integration() + ) + + if iam_integration is None: + return dataset + + try: + if not isinstance(iam_integration, IAMIntegration): + raise TypeError( + f"iam integration must be a reference an `IAMIntegration` object. Found {type(iam_integration)}" + ) + + if not iam_integration.valid: + raise ValueError( + "Integration is not valid. Please select another." + ) + + self.execute( + """mutation setSignerForDatasetPyApi($signerId: ID!, $datasetId: ID!) { + setSignerForDataset(data: { signerId: $signerId}, where: {id: $datasetId}){id}} + """, + {"signerId": iam_integration.uid, "datasetId": dataset.uid}, + ) + validation_result = self.execute( + """mutation validateDatasetPyApi($id: ID!){validateDataset(where: {id : $id}){ + valid checks{name, success}}} + """, + {"id": dataset.uid}, + ) + + if not validation_result["validateDataset"]["valid"]: + raise lbox.exceptions.LabelboxError( + "IAMIntegration was not successfully added to the dataset." + ) + except Exception as e: + dataset.delete() + raise e + return dataset + + def create_project(self, **kwargs) -> Project: + """Creates a Project object on the server. + + Attribute values are passed as keyword arguments. + + >>> project = client.create_project( + name="", + description="", + media_type=MediaType.Image, + queue_mode=QueueMode.Batch + ) + + Args: + name (str): A name for the project + description (str): A short summary for the project + media_type (MediaType): The type of assets that this project will accept + queue_mode (Optional[QueueMode]): The queue mode to use + quality_mode (Optional[QualityMode]): The quality mode to use (e.g. Benchmark, Consensus). Defaults to + Benchmark + quality_modes (Optional[List[QualityMode]]): The quality modes to use (e.g. Benchmark, Consensus). Defaults to + Benchmark. + Returns: + A new Project object. + Raises: + InvalidAttributeError: If the Project type does not contain + any of the attribute names given in kwargs. + + NOTE: the following attributes are used only in chat model evaluation projects: + dataset_name_or_id, append_to_existing_dataset, data_row_count, editor_task_type + They are not used for general projects and not supported in this method + """ + # The following arguments are not supported for general projects, only for chat model evaluation projects + kwargs.pop("dataset_name_or_id", None) + kwargs.pop("append_to_existing_dataset", None) + kwargs.pop("data_row_count", None) + kwargs.pop("editor_task_type", None) + return self._create_project(**kwargs) + + @overload + def create_model_evaluation_project( + self, + dataset_name: str, + dataset_id: str = None, + data_row_count: int = 100, + **kwargs, + ) -> Project: + pass + + @overload + def create_model_evaluation_project( + self, + dataset_id: str, + dataset_name: str = None, + data_row_count: int = 100, + **kwargs, + ) -> Project: + pass + + def create_model_evaluation_project( + self, + dataset_id: Optional[str] = None, + dataset_name: Optional[str] = None, + data_row_count: int = 100, + **kwargs, + ) -> Project: + """ + Use this method exclusively to create a chat model evaluation project. + Args: + dataset_name: When creating a new dataset, pass the name + dataset_id: When using an existing dataset, pass the id + data_row_count: The number of data row assets to use for the project + **kwargs: Additional parameters to pass to the the create_project method + Returns: + Project: The created project + + Examples: + >>> client.create_model_evaluation_project(name=project_name, dataset_name="new data set") + >>> This creates a new dataset with a default number of rows (100), creates new project and assigns a batch of the newly created datarows to the project. + + >>> client.create_model_evaluation_project(name=project_name, dataset_name="new data set", data_row_count=10) + >>> This creates a new dataset with 10 data rows, creates new project and assigns a batch of the newly created datarows to the project. + + >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0") + >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created data rows to the project. + + >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10) + >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. + + + """ + if not dataset_id and not dataset_name: + raise ValueError( + "dataset_name or data_set_id must be present and not be an empty string." + ) + if data_row_count <= 0: + raise ValueError("data_row_count must be a positive integer.") + + if dataset_id: + append_to_existing_dataset = True + dataset_name_or_id = dataset_id + else: + append_to_existing_dataset = False + dataset_name_or_id = dataset_name + + kwargs["media_type"] = MediaType.Conversational + kwargs["dataset_name_or_id"] = dataset_name_or_id + kwargs["append_to_existing_dataset"] = append_to_existing_dataset + kwargs["data_row_count"] = data_row_count + kwargs["editor_task_type"] = EditorTaskType.ModelChatEvaluation.value + + return self._create_project(**kwargs) + + def create_offline_model_evaluation_project(self, **kwargs) -> Project: + """ + Creates a project for offline model evaluation. + Args: + **kwargs: Additional parameters to pass see the create_project method + Returns: + Project: The created project + """ + kwargs["media_type"] = ( + MediaType.Conversational + ) # Only Conversational is supported + kwargs["editor_task_type"] = ( + EditorTaskType.OfflineModelChatEvaluation.value + ) # Special editor task type for offline model evaluation + + # The following arguments are not supported for offline model evaluation + kwargs.pop("dataset_name_or_id", None) + kwargs.pop("append_to_existing_dataset", None) + kwargs.pop("data_row_count", None) + + return self._create_project(**kwargs) + + def create_prompt_response_generation_project( + self, + dataset_id: Optional[str] = None, + dataset_name: Optional[str] = None, + data_row_count: int = 100, + **kwargs, + ) -> Project: + """ + Use this method exclusively to create a prompt and response generation project. + + Args: + dataset_name: When creating a new dataset, pass the name + dataset_id: When using an existing dataset, pass the id + data_row_count: The number of data row assets to use for the project + **kwargs: Additional parameters to pass see the create_project method + Returns: + Project: The created project + + NOTE: Only a dataset_name or dataset_id should be included + + Examples: + >>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", media_type=MediaType.LLMPromptResponseCreation) + >>> This creates a new dataset with a default number of rows (100), creates new prompt and response creation project and assigns a batch of the newly created data rows to the project. + + >>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", data_row_count=10, media_type=MediaType.LLMPromptCreation) + >>> This creates a new dataset with 10 data rows, creates new prompt creation project and assigns a batch of the newly created datarows to the project. + + >>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", media_type=MediaType.LLMPromptCreation) + >>> This creates a new prompt creation project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created data rows to the project. + + >>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10, media_type=MediaType.LLMPromptResponseCreation) + >>> This creates a new prompt and response creation project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. + + """ + if not dataset_id and not dataset_name: + raise ValueError( + "dataset_name or dataset_id must be present and not be an empty string." + ) + + if dataset_id and dataset_name: + raise ValueError( + "Only provide a dataset_name or dataset_id, not both." + ) + + if data_row_count <= 0: + raise ValueError("data_row_count must be a positive integer.") + + if dataset_id: + append_to_existing_dataset = True + dataset_name_or_id = dataset_id + else: + append_to_existing_dataset = False + dataset_name_or_id = dataset_name + + if "media_type" in kwargs and kwargs.get("media_type") not in [ + MediaType.LLMPromptCreation, + MediaType.LLMPromptResponseCreation, + ]: + raise ValueError( + "media_type must be either LLMPromptCreation or LLMPromptResponseCreation" + ) + + kwargs["dataset_name_or_id"] = dataset_name_or_id + kwargs["append_to_existing_dataset"] = append_to_existing_dataset + kwargs["data_row_count"] = data_row_count + + kwargs.pop("editor_task_type", None) + + return self._create_project(**kwargs) + + def create_response_creation_project(self, **kwargs) -> Project: + """ + Creates a project for response creation. + Args: + **kwargs: Additional parameters to pass see the create_project method + Returns: + Project: The created project + """ + kwargs["media_type"] = MediaType.Text # Only Text is supported + kwargs["editor_task_type"] = ( + EditorTaskType.ResponseCreation.value + ) # Special editor task type for response creation projects + + # The following arguments are not supported for response creation projects + kwargs.pop("dataset_name_or_id", None) + kwargs.pop("append_to_existing_dataset", None) + kwargs.pop("data_row_count", None) + + return self._create_project(**kwargs) + + def _create_project(self, **kwargs) -> Project: + auto_audit_percentage = kwargs.get("auto_audit_percentage") + auto_audit_number_of_labels = kwargs.get("auto_audit_number_of_labels") + if ( + auto_audit_percentage is not None + or auto_audit_number_of_labels is not None + ): + raise ValueError( + "quality_modes must be set instead of auto_audit_percentage or auto_audit_number_of_labels." + ) + + name = kwargs.get("name") + if name is None or not name.strip(): + raise ValueError("project name must be a valid string.") + + queue_mode = kwargs.get("queue_mode") + if queue_mode is QueueMode.Dataset: + raise ValueError( + "Dataset queue mode is deprecated. Please prefer Batch queue mode." + ) + elif queue_mode is QueueMode.Batch: + logger.warning( + "Passing a queue mode of batch is redundant and will soon no longer be supported." + ) + + media_type = kwargs.get("media_type") + if media_type and MediaType.is_supported(media_type): + media_type_value = media_type.value + elif media_type: + raise TypeError( + f"{media_type} is not a valid media type. Use" + f" any of {MediaType.get_supported_members()}" + " from MediaType. Example: MediaType.Image." + ) + else: + logger.warning( + "Creating a project without specifying media_type" + " through this method will soon no longer be supported." + ) + media_type_value = None + + quality_modes = kwargs.get("quality_modes") + quality_mode = kwargs.get("quality_mode") + if quality_mode: + logger.warning( + "Passing quality_mode is deprecated and will soon no longer be supported. Use quality_modes instead." + ) + + if quality_modes and quality_mode: + raise ValueError( + "Cannot use both quality_modes and quality_mode at the same time. Use one or the other." + ) + + if not quality_modes and not quality_mode: + logger.info("Defaulting quality modes to Benchmark and Consensus.") + + data = kwargs + data.pop("quality_modes", None) + data.pop("quality_mode", None) + + # check if quality_modes is a set, if not, convert to set + quality_modes_set = quality_modes + if quality_modes and not isinstance(quality_modes, set): + quality_modes_set = set(quality_modes) + if quality_mode: + quality_modes_set = {quality_mode} + + if ( + quality_modes_set is None + or len(quality_modes_set) == 0 + or quality_modes_set + == {QualityMode.Benchmark, QualityMode.Consensus} + ): + data["auto_audit_number_of_labels"] = ( + CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS + ) + data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE + data["is_benchmark_enabled"] = True + data["is_consensus_enabled"] = True + elif quality_modes_set == {QualityMode.Benchmark}: + data["auto_audit_number_of_labels"] = ( + BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS + ) + data["auto_audit_percentage"] = BENCHMARK_AUTO_AUDIT_PERCENTAGE + data["is_benchmark_enabled"] = True + elif quality_modes_set == {QualityMode.Consensus}: + data["auto_audit_number_of_labels"] = ( + CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS + ) + data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE + data["is_consensus_enabled"] = True + else: + raise ValueError( + f"{quality_modes_set} is not a valid quality modes set. Allowed values are [Benchmark, Consensus]" + ) + + params = {**data} + if media_type_value: + params["media_type"] = media_type_value + + extra_params = { + Field.String("dataset_name_or_id"): params.pop( + "dataset_name_or_id", None + ), + Field.Boolean("append_to_existing_dataset"): params.pop( + "append_to_existing_dataset", None + ), + } + extra_params = {k: v for k, v in extra_params.items() if v is not None} + return self._create(Entity.Project, params, extra_params) + + def get_roles(self) -> List[Role]: + """ + Returns: + Roles: Provides information on available roles within an organization. + Roles are used for user management. + """ + return role.get_roles(self) + + def get_data_row(self, data_row_id): + """ + + Returns: + DataRow: returns a single data row given the data row id + """ + + return self._get_single(Entity.DataRow, data_row_id) + + def get_data_row_by_global_key(self, global_key: str) -> DataRow: + """ + Returns: DataRow: returns a single data row given the global key + """ + res = self.get_data_row_ids_for_global_keys([global_key]) + if res["status"] != "SUCCESS": + raise lbox.exceptions.ResourceNotFoundError( + Entity.DataRow, {global_key: global_key} + ) + data_row_id = res["results"][0] + + return self.get_data_row(data_row_id) + + def get_data_row_metadata_ontology(self) -> DataRowMetadataOntology: + """ + + Returns: + DataRowMetadataOntology: The ontology for Data Row Metadata for an organization + + """ + if self._data_row_metadata_ontology is None: + self._data_row_metadata_ontology = DataRowMetadataOntology(self) + return self._data_row_metadata_ontology + + def get_model(self, model_id) -> Model: + """Gets a single Model with the given ID. + + >>> model = client.get_model("") + + Args: + model_id (str): Unique ID of the Model. + Returns: + The sought Model. + Raises: + lbox.exceptions.ResourceNotFoundError: If there is no + Model with the given ID. + """ + return self._get_single(Entity.Model, model_id) + + def get_models(self, where=None) -> List[Model]: + """Fetches all the models the user has access to. + + >>> models = client.get_models(where=(Model.name == "")) + + Args: + where (Comparison, LogicalOperation or None): The `where` clause + for filtering. + Returns: + An iterable of Models (typically a PaginatedCollection). + """ + return self._get_all(Entity.Model, where, filter_deleted=False) + + def create_model(self, name, ontology_id) -> Model: + """Creates a Model object on the server. + + >>> model = client.create_model(, ) + + Args: + name (string): Name of the model + ontology_id (string): ID of the related ontology + Returns: + A new Model object. + Raises: + InvalidAttributeError: If the Model type does not contain + any of the attribute names given in kwargs. + """ + query_str = """mutation createModelPyApi($name: String!, $ontologyId: ID!){ + createModel(data: {name : $name, ontologyId : $ontologyId}){ + %s + } + }""" % query.results_query_part(Entity.Model) + + result = self.execute( + query_str, {"name": name, "ontologyId": ontology_id} + ) + return Entity.Model(self, result["createModel"]) + + def get_data_row_ids_for_external_ids( + self, external_ids: List[str] + ) -> Dict[str, List[str]]: + """ + Returns a list of data row ids for a list of external ids. + There is a max of 1500 items returned at a time. + + Args: + external_ids: List of external ids to fetch data row ids for + Returns: + A dict of external ids as keys and values as a list of data row ids that correspond to that external id. + """ + query_str = """query externalIdsToDataRowIdsPyApi($externalId_in: [String!]!){ + externalIdsToDataRowIds(externalId_in: $externalId_in) { dataRowId externalId } + } + """ + max_ids_per_request = 100 + result = defaultdict(list) + for i in range(0, len(external_ids), max_ids_per_request): + for row in self.execute( + query_str, + {"externalId_in": external_ids[i : i + max_ids_per_request]}, + )["externalIdsToDataRowIds"]: + result[row["externalId"]].append(row["dataRowId"]) + return result + + def get_ontology(self, ontology_id) -> Ontology: + """ + Fetches an Ontology by id. + + Args: + ontology_id (str): The id of the ontology to query for + Returns: + Ontology + """ + return self._get_single(Entity.Ontology, ontology_id) + + def get_ontologies(self, name_contains) -> PaginatedCollection: + """ + Fetches all ontologies with names that match the name_contains string. + + Args: + name_contains (str): the string to search ontology names by + Returns: + PaginatedCollection of Ontologies with names that match `name_contains` + """ + query_str = """query getOntologiesPyApi($search: String, $filter: OntologyFilter, $from : String, $first: PageSize){ + ontologies(where: {filter: $filter, search: $search}, after: $from, first: $first){ + nodes {%s} + nextCursor + } + } + """ % query.results_query_part(Entity.Ontology) + params = {"search": name_contains, "filter": {"status": "ALL"}} + return PaginatedCollection( + self, + query_str, + params, + ["ontologies", "nodes"], + Entity.Ontology, + ["ontologies", "nextCursor"], + ) + + def get_feature_schema(self, feature_schema_id): + """ + Fetches a feature schema. Only supports top level feature schemas. + + Args: + feature_schema_id (str): The id of the feature schema to query for + Returns: + FeatureSchema + """ + + query_str = """query rootSchemaNodePyApi($rootSchemaNodeWhere: RootSchemaNodeWhere!){ + rootSchemaNode(where: $rootSchemaNodeWhere){%s} + }""" % query.results_query_part(Entity.FeatureSchema) + + res = self.execute( + query_str, + {"rootSchemaNodeWhere": {"featureSchemaId": feature_schema_id}}, + )["rootSchemaNode"] + res["id"] = res["normalized"]["featureSchemaId"] + return Entity.FeatureSchema(self, res) + + def get_feature_schemas(self, name_contains) -> PaginatedCollection: + """ + Fetches top level feature schemas with names that match the `name_contains` string + + Args: + name_contains (str): search filter for a name of a root feature schema + If present, results in a case insensitive 'like' search for feature schemas + If None, returns all top level feature schemas + Returns: + PaginatedCollection of FeatureSchemas with names that match `name_contains` + """ + query_str = """query rootSchemaNodesPyApi($search: String, $filter: RootSchemaNodeFilter, $from : String, $first: PageSize){ + rootSchemaNodes(where: {filter: $filter, search: $search}, after: $from, first: $first){ + nodes {%s} + nextCursor + } + } + """ % query.results_query_part(Entity.FeatureSchema) + params = {"search": name_contains, "filter": {"status": "ALL"}} + + def rootSchemaPayloadToFeatureSchema(client, payload): + # Technically we are querying for a Schema Node. + # But the features are the same so we just grab the feature schema id + payload["id"] = payload["normalized"]["featureSchemaId"] + return Entity.FeatureSchema(client, payload) + + return PaginatedCollection( + self, + query_str, + params, + ["rootSchemaNodes", "nodes"], + rootSchemaPayloadToFeatureSchema, + ["rootSchemaNodes", "nextCursor"], + ) + + def create_ontology_from_feature_schemas( + self, + name, + feature_schema_ids, + media_type: MediaType = None, + ontology_kind: OntologyKind = None, + ) -> Ontology: + """ + Creates an ontology from a list of feature schema ids + + Args: + name (str): Name of the ontology + feature_schema_ids (List[str]): List of feature schema ids corresponding to + top level tools and classifications to include in the ontology + media_type (MediaType or None): Media type of a new ontology. + ontology_kind (OntologyKind or None): set to OntologyKind.ModelEvaluation if the ontology is for chat evaluation, + leave as None otherwise. + Returns: + The created Ontology + + NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text. + """ + tools, classifications = [], [] + for feature_schema_id in feature_schema_ids: + feature_schema = self.get_feature_schema(feature_schema_id) + tool = ["tool"] + if "tool" in feature_schema.normalized: + tool = feature_schema.normalized["tool"] + try: + Tool.Type(tool) + tools.append(feature_schema.normalized) + except ValueError: + raise ValueError( + f"Tool `{tool}` not in list of supported tools." + ) + elif "type" in feature_schema.normalized: + classification = feature_schema.normalized["type"] + if ( + classification + in Classification.Type._value2member_map_.keys() + ): + Classification.Type(classification) + classifications.append(feature_schema.normalized) + elif ( + classification + in PromptResponseClassification.Type._value2member_map_.keys() + ): + PromptResponseClassification.Type(classification) + classifications.append(feature_schema.normalized) + else: + raise ValueError( + f"Classification `{classification}` not in list of supported classifications." + ) + else: + raise ValueError( + "Neither `tool` or `classification` found in the normalized feature schema" + ) + normalized = {"tools": tools, "classifications": classifications} + + # validation for ontology_kind and media_type is done within self.create_ontology + return self.create_ontology( + name=name, + normalized=normalized, + media_type=media_type, + ontology_kind=ontology_kind, + ) + + def delete_unused_feature_schema(self, feature_schema_id: str) -> None: + """ + Deletes a feature schema if it is not used by any ontologies or annotations + Args: + feature_schema_id (str): The id of the feature schema to delete + Example: + >>> client.delete_unused_feature_schema("cleabc1my012ioqvu5anyaabc") + """ + + endpoint = ( + self.rest_endpoint + + "/feature-schemas/" + + urllib.parse.quote(feature_schema_id) + ) + response = self.connection.delete(endpoint) + + if response.status_code != requests.codes.no_content: + raise lbox.exceptions.LabelboxError( + "Failed to delete the feature schema, message: " + + str(response.json()["message"]) + ) + + def delete_unused_ontology(self, ontology_id: str) -> None: + """ + Deletes an ontology if it is not used by any annotations + Args: + ontology_id (str): The id of the ontology to delete + Example: + >>> client.delete_unused_ontology("cleabc1my012ioqvu5anyaabc") + """ + endpoint = ( + self.rest_endpoint + + "/ontologies/" + + urllib.parse.quote(ontology_id) + ) + response = self.connection.delete(endpoint) + + if response.status_code != requests.codes.no_content: + raise lbox.exceptions.LabelboxError( + "Failed to delete the ontology, message: " + + str(response.json()["message"]) + ) + + def update_feature_schema_title( + self, feature_schema_id: str, title: str + ) -> FeatureSchema: + """ + Updates a title of a feature schema + Args: + feature_schema_id (str): The id of the feature schema to update + title (str): The new title of the feature schema + Returns: + The updated feature schema + Example: + >>> client.update_feature_schema_title("cleabc1my012ioqvu5anyaabc", "New Title") + """ + + endpoint = ( + self.rest_endpoint + + "/feature-schemas/" + + urllib.parse.quote(feature_schema_id) + + "/definition" + ) + response = self.connection.patch(endpoint, json={"title": title}) + + if response.status_code == requests.codes.ok: + return self.get_feature_schema(feature_schema_id) + else: + raise lbox.exceptions.LabelboxError( + "Failed to update the feature schema, message: " + + str(response.json()["message"]) + ) + + def upsert_feature_schema(self, feature_schema: Dict) -> FeatureSchema: + """ + Upserts a feature schema + Args: + feature_schema: Dict representing the feature schema to upsert + Returns: + The upserted feature schema + Example: + Insert a new feature schema + >>> tool = Tool(name="tool", tool=Tool.Type.BOUNDING_BOX, color="#FF0000") + >>> client.upsert_feature_schema(tool.asdict()) + Update an existing feature schema + >>> tool = Tool(feature_schema_id="cleabc1my012ioqvu5anyaabc", name="tool", tool=Tool.Type.BOUNDING_BOX, color="#FF0000") + >>> client.upsert_feature_schema(tool.asdict()) + """ + + feature_schema_id = ( + feature_schema.get("featureSchemaId") or "new_feature_schema_id" + ) + endpoint = ( + self.rest_endpoint + + "/feature-schemas/" + + urllib.parse.quote(feature_schema_id) + ) + response = self.connection.put( + endpoint, json={"normalized": json.dumps(feature_schema)} + ) + + if response.status_code == requests.codes.ok: + return self.get_feature_schema(response.json()["schemaId"]) + else: + raise lbox.exceptions.LabelboxError( + "Failed to upsert the feature schema, message: " + + str(response.json()["message"]) + ) + + def insert_feature_schema_into_ontology( + self, feature_schema_id: str, ontology_id: str, position: int + ) -> None: + """ + Inserts a feature schema into an ontology. If the feature schema is already in the ontology, + it will be moved to the new position. + Args: + feature_schema_id (str): The feature schema id to upsert + ontology_id (str): The id of the ontology to insert the feature schema into + position (int): The position number of the feature schema in the ontology + Example: + >>> client.insert_feature_schema_into_ontology("cleabc1my012ioqvu5anyaabc", "clefdvwl7abcgefgu3lyvcde", 2) + """ + + endpoint = ( + self.rest_endpoint + + "/ontologies/" + + urllib.parse.quote(ontology_id) + + "/feature-schemas/" + + urllib.parse.quote(feature_schema_id) + ) + response = self.connection.post(endpoint, json={"position": position}) + if response.status_code != requests.codes.created: + raise lbox.exceptions.LabelboxError( + "Failed to insert the feature schema into the ontology, message: " + + str(response.json()["message"]) + ) + + def get_unused_ontologies(self, after: str = None) -> List[str]: + """ + Returns a list of unused ontology ids + Args: + after (str): The cursor to use for pagination + Returns: + A list of unused ontology ids + Example: + To get the first page of unused ontology ids (100 at a time) + >>> client.get_unused_ontologies() + To get the next page of unused ontology ids + >>> client.get_unused_ontologies("cleabc1my012ioqvu5anyaabc") + """ + + endpoint = self.rest_endpoint + "/ontologies/unused" + response = self.connection.get(endpoint, json={"after": after}) + + if response.status_code == requests.codes.ok: + return response.json() + else: + raise lbox.exceptions.LabelboxError( + "Failed to get unused ontologies, message: " + + str(response.json()["message"]) + ) + + def get_unused_feature_schemas(self, after: str = None) -> List[str]: + """ + Returns a list of unused feature schema ids + Args: + after (str): The cursor to use for pagination + Returns: + A list of unused feature schema ids + Example: + To get the first page of unused feature schema ids (100 at a time) + >>> client.get_unused_feature_schemas() + To get the next page of unused feature schema ids + >>> client.get_unused_feature_schemas("cleabc1my012ioqvu5anyaabc") + """ + + endpoint = self.rest_endpoint + "/feature-schemas/unused" + response = self.connection.get(endpoint, json={"after": after}) + + if response.status_code == requests.codes.ok: + return response.json() + else: + raise lbox.exceptions.LabelboxError( + "Failed to get unused feature schemas, message: " + + str(response.json()["message"]) + ) + + def create_ontology( + self, + name, + normalized, + media_type: MediaType = None, + ontology_kind: OntologyKind = None, + ) -> Ontology: + """ + Creates an ontology from normalized data + >>> normalized = {"tools" : [{'tool': 'polygon', 'name': 'cat', 'color': 'black'}], "classifications" : []} + >>> ontology = client.create_ontology("ontology-name", normalized) + + Or use the ontology builder. It is especially useful for complex ontologies + >>> normalized = OntologyBuilder(tools=[Tool(tool=Tool.Type.BBOX, name="cat", color = 'black')]).asdict() + >>> ontology = client.create_ontology("ontology-name", normalized) + + To reuse existing feature schemas, use `create_ontology_from_feature_schemas()` + More details can be found here: + https://github.com/Labelbox/labelbox-python/blob/develop/examples/basics/ontologies.ipynb + + Args: + name (str): Name of the ontology + normalized (dict): A normalized ontology payload. See above for details. + media_type (MediaType or None): Media type of a new ontology + ontology_kind (OntologyKind or None): set to OntologyKind.ModelEvaluation if the ontology is for chat evaluation or + OntologyKind.ResponseCreation if ontology is for response creation, leave as None otherwise. + + Returns: + The created Ontology + + NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text. + """ + + media_type_value = None + if media_type: + if MediaType.is_supported(media_type): + media_type_value = media_type.value + else: + raise get_media_type_validation_error(media_type) + + if ontology_kind and OntologyKind.is_supported(ontology_kind): + media_type = OntologyKind.evaluate_ontology_kind_with_media_type( + ontology_kind, media_type + ) + editor_task_type_value = EditorTaskTypeMapper.to_editor_task_type( + ontology_kind, media_type + ).value + elif ontology_kind: + raise OntologyKind.get_ontology_kind_validation_error(ontology_kind) + else: + editor_task_type_value = None + + query_str = """mutation upsertRootSchemaNodePyApi($data: UpsertOntologyInput!){ + upsertOntology(data: $data){ %s } + } """ % query.results_query_part(Entity.Ontology) + params = { + "data": { + "name": name, + "normalized": json.dumps(normalized), + "mediaType": media_type_value, + } + } + if editor_task_type_value: + params["data"]["editorTaskType"] = editor_task_type_value + + res = self.execute(query_str, params) + return Entity.Ontology(self, res["upsertOntology"]) + + def create_feature_schema(self, normalized): + """ + Creates a feature schema from normalized data. + >>> normalized = {'tool': 'polygon', 'name': 'cat', 'color': 'black'} + >>> feature_schema = client.create_feature_schema(normalized) + + Or use the Tool or Classification objects. It is especially useful for complex tools. + >>> normalized = Tool(tool=Tool.Type.BBOX, name="cat", color = 'black').asdict() + >>> feature_schema = client.create_feature_schema(normalized) + + Subclasses are also supported + >>> normalized = Tool( + tool=Tool.Type.SEGMENTATION, + name="cat", + classifications=[ + Classification( + class_type=Classification.Type.TEXT, + name="name" + ) + ] + ) + >>> feature_schema = client.create_feature_schema(normalized) + + More details can be found here: + https://github.com/Labelbox/labelbox-python/blob/develop/examples/basics/ontologies.ipynb + + Args: + normalized (dict): A normalized tool or classification payload. See above for details + Returns: + The created FeatureSchema. + """ + query_str = """mutation upsertRootSchemaNodePyApi($data: UpsertRootSchemaNodeInput!){ + upsertRootSchemaNode(data: $data){ %s } + } """ % query.results_query_part(Entity.FeatureSchema) + normalized = {k: v for k, v in normalized.items() if v} + params = {"data": {"normalized": json.dumps(normalized)}} + res = self.execute(query_str, params)["upsertRootSchemaNode"] + # Technically we are querying for a Schema Node. + # But the features are the same so we just grab the feature schema id + res["id"] = res["normalized"]["featureSchemaId"] + return Entity.FeatureSchema(self, res) + + def get_model_run(self, model_run_id: str) -> ModelRun: + """Gets a single ModelRun with the given ID. + + >>> model_run = client.get_model_run("") + + Args: + model_run_id (str): Unique ID of the ModelRun. + Returns: + A ModelRun object. + """ + return self._get_single(Entity.ModelRun, model_run_id) + + def assign_global_keys_to_data_rows( + self, + global_key_to_data_row_inputs: List[Dict[str, str]], + timeout_seconds=60, + ) -> Dict[str, Union[str, List[Any]]]: + """ + Assigns global keys to data rows. + + Args: + A list of dicts containing data_row_id and global_key. + Returns: + Dictionary containing 'status', 'results' and 'errors'. + + 'Status' contains the outcome of this job. It can be one of + 'Success', 'Partial Success', or 'Failure'. + + 'Results' contains the successful global_key assignments, including + global_keys that have been sanitized to Labelbox standards. + + 'Errors' contains global_key assignments that failed, along with + the reasons for failure. + Examples: + >>> global_key_data_row_inputs = [ + {"data_row_id": "cl7asgri20yvo075b4vtfedjb", "global_key": "key1"}, + {"data_row_id": "cl7asgri10yvg075b4pz176ht", "global_key": "key2"}, + ] + >>> job_result = client.assign_global_keys_to_data_rows(global_key_data_row_inputs) + >>> print(job_result['status']) + Partial Success + >>> print(job_result['results']) + [{'data_row_id': 'cl7tv9wry00hlka6gai588ozv', 'global_key': 'gk', 'sanitized': False}] + >>> print(job_result['errors']) + [{'data_row_id': 'cl7tpjzw30031ka6g4evqdfoy', 'global_key': 'gk"', 'error': 'Invalid global key'}] + """ + + def _format_successful_rows( + rows: Dict[str, str], sanitized: bool + ) -> List[Dict[str, str]]: + return [ + { + "data_row_id": r["dataRowId"], + "global_key": r["globalKey"], + "sanitized": sanitized, + } + for r in rows + ] + + def _format_failed_rows( + rows: Dict[str, str], error_msg: str + ) -> List[Dict[str, str]]: + return [ + { + "data_row_id": r["dataRowId"], + "global_key": r["globalKey"], + "error": error_msg, + } + for r in rows + ] + + # Validate input dict + validation_errors = [] + for input in global_key_to_data_row_inputs: + if "data_row_id" not in input or "global_key" not in input: + validation_errors.append(input) + if len(validation_errors) > 0: + raise ValueError( + f"Must provide a list of dicts containing both `data_row_id` and `global_key`. The following dict(s) are invalid: {validation_errors}." + ) + + # Start assign global keys to data rows job + query_str = """mutation assignGlobalKeysToDataRowsPyApi($globalKeyDataRowLinks: [AssignGlobalKeyToDataRowInput!]!) { + assignGlobalKeysToDataRows(data: {assignInputs: $globalKeyDataRowLinks}) { + jobId + } + } + """ + params = { + "globalKeyDataRowLinks": [ + {utils.camel_case(key): value for key, value in input.items()} + for input in global_key_to_data_row_inputs + ] + } + assign_global_keys_to_data_rows_job = self.execute(query_str, params) + + # Query string for retrieving job status and result, if job is done + result_query_str = """query assignGlobalKeysToDataRowsResultPyApi($jobId: ID!) { + assignGlobalKeysToDataRowsResult(jobId: {id: $jobId}) { + jobStatus + data { + sanitizedAssignments { + dataRowId + globalKey + } + invalidGlobalKeyAssignments { + dataRowId + globalKey + } + unmodifiedAssignments { + dataRowId + globalKey + } + accessDeniedAssignments { + dataRowId + globalKey + } + }}} + """ + result_params = { + "jobId": assign_global_keys_to_data_rows_job[ + "assignGlobalKeysToDataRows" + ]["jobId"] + } + + # Poll job status until finished, then retrieve results + sleep_time = 2 + start_time = time.time() + while True: + res = self.execute(result_query_str, result_params) + if ( + res["assignGlobalKeysToDataRowsResult"]["jobStatus"] + == "COMPLETE" + ): + results, errors = [], [] + res = res["assignGlobalKeysToDataRowsResult"]["data"] + # Successful assignments + results.extend( + _format_successful_rows( + rows=res["sanitizedAssignments"], sanitized=True + ) + ) + results.extend( + _format_successful_rows( + rows=res["unmodifiedAssignments"], sanitized=False + ) + ) + # Failed assignments + errors.extend( + _format_failed_rows( + rows=res["invalidGlobalKeyAssignments"], + error_msg="Invalid assignment. Either DataRow does not exist, or globalKey is invalid", + ) + ) + errors.extend( + _format_failed_rows( + rows=res["accessDeniedAssignments"], + error_msg="Access denied to Data Row", + ) + ) + + if not errors: + status = CollectionJobStatus.SUCCESS.value + elif errors and results: + status = CollectionJobStatus.PARTIAL_SUCCESS.value + else: + status = CollectionJobStatus.FAILURE.value + + if errors: + logger.warning( + "There are errors present. Please look at 'errors' in the returned dict for more details" + ) + + return { + "status": status, + "results": results, + "errors": errors, + } + elif ( + res["assignGlobalKeysToDataRowsResult"]["jobStatus"] == "FAILED" + ): + raise lbox.exceptions.LabelboxError( + "Job assign_global_keys_to_data_rows failed." + ) + current_time = time.time() + if current_time - start_time > timeout_seconds: + raise lbox.exceptions.TimeoutError( + "Timed out waiting for assign_global_keys_to_data_rows job to complete." + ) + time.sleep(sleep_time) + + def get_data_row_ids_for_global_keys( + self, global_keys: List[str], timeout_seconds=60 + ) -> Dict[str, Union[str, List[Any]]]: + """ + Gets data row ids for a list of global keys. + + Deprecation Notice: This function will soon no longer return 'Deleted Data Rows' + as part of the 'results'. Global keys for deleted data rows will soon be placed + under 'Data Row not found' portion. + + Args: + A list of global keys + Returns: + Dictionary containing 'status', 'results' and 'errors'. + + 'Status' contains the outcome of this job. It can be one of + 'Success', 'Partial Success', or 'Failure'. + + 'Results' contains a list of the fetched corresponding data row ids in the input order. + For data rows that cannot be fetched due to an error, or data rows that do not exist, + empty string is returned at the position of the respective global_key. + More error information can be found in the 'Errors' section. + + 'Errors' contains a list of global_keys that could not be fetched, along + with the failure reason + Examples: + >>> job_result = client.get_data_row_ids_for_global_keys(["key1","key2"]) + >>> print(job_result['status']) + Partial Success + >>> print(job_result['results']) + ['cl7tv9wry00hlka6gai588ozv', 'cl7tv9wxg00hpka6gf8sh81bj'] + >>> print(job_result['errors']) + [{'global_key': 'asdf', 'error': 'Data Row not found'}] + """ + + def _format_failed_rows( + rows: List[str], error_msg: str + ) -> List[Dict[str, str]]: + return [{"global_key": r, "error": error_msg} for r in rows] + + # Start get data rows for global keys job + query_str = """query getDataRowsForGlobalKeysPyApi($globalKeys: [ID!]!) { + dataRowsForGlobalKeys(where: {ids: $globalKeys}) { jobId}} + """ + params = {"globalKeys": global_keys} + data_rows_for_global_keys_job = self.execute(query_str, params) + + # Query string for retrieving job status and result, if job is done + result_query_str = """query getDataRowsForGlobalKeysResultPyApi($jobId: ID!) { + dataRowsForGlobalKeysResult(jobId: {id: $jobId}) { data { + fetchedDataRows { id } + notFoundGlobalKeys + accessDeniedGlobalKeys + } jobStatus}} + """ + result_params = { + "jobId": data_rows_for_global_keys_job["dataRowsForGlobalKeys"][ + "jobId" + ] + } + + # Poll job status until finished, then retrieve results + sleep_time = 2 + start_time = time.time() + while True: + res = self.execute(result_query_str, result_params) + if res["dataRowsForGlobalKeysResult"]["jobStatus"] == "COMPLETE": + data = res["dataRowsForGlobalKeysResult"]["data"] + results, errors = [], [] + results.extend([row["id"] for row in data["fetchedDataRows"]]) + errors.extend( + _format_failed_rows( + data["notFoundGlobalKeys"], "Data Row not found" + ) + ) + errors.extend( + _format_failed_rows( + data["accessDeniedGlobalKeys"], + "Access denied to Data Row", + ) + ) + + # Invalid results may contain empty string, so we must filter + # them prior to checking for PARTIAL_SUCCESS + filtered_results = list(filter(lambda r: r != "", results)) + if not errors: + status = CollectionJobStatus.SUCCESS.value + elif errors and len(filtered_results) > 0: + status = CollectionJobStatus.PARTIAL_SUCCESS.value + else: + status = CollectionJobStatus.FAILURE.value + + if errors: + logger.warning( + "There are errors present. Please look at 'errors' in the returned dict for more details" + ) + + return {"status": status, "results": results, "errors": errors} + elif res["dataRowsForGlobalKeysResult"]["jobStatus"] == "FAILED": + raise lbox.exceptions.LabelboxError( + "Job dataRowsForGlobalKeys failed." + ) + current_time = time.time() + if current_time - start_time > timeout_seconds: + raise lbox.exceptions.TimeoutError( + "Timed out waiting for get_data_rows_for_global_keys job to complete." + ) + time.sleep(sleep_time) + + def clear_global_keys( + self, global_keys: List[str], timeout_seconds=60 + ) -> Dict[str, Union[str, List[Any]]]: + """ + Clears global keys for the data rows tha correspond to the global keys provided. + + Args: + A list of global keys + Returns: + Dictionary containing 'status', 'results' and 'errors'. + + 'Status' contains the outcome of this job. It can be one of + 'Success', 'Partial Success', or 'Failure'. + + 'Results' contains a list global keys that were successfully cleared. + + 'Errors' contains a list of global_keys correspond to the data rows that could not be + modified, accessed by the user, or not found. + Examples: + >>> job_result = client.clear_global_keys(["key1","key2","notfoundkey"]) + >>> print(job_result['status']) + Partial Success + >>> print(job_result['results']) + ['key1', 'key2'] + >>> print(job_result['errors']) + [{'global_key': 'notfoundkey', 'error': 'Failed to find data row matching provided global key'}] + """ + + def _format_failed_rows( + rows: List[str], error_msg: str + ) -> List[Dict[str, str]]: + return [{"global_key": r, "error": error_msg} for r in rows] + + # Start get data rows for global keys job + query_str = """mutation clearGlobalKeysPyApi($globalKeys: [ID!]!) { + clearGlobalKeys(where: {ids: $globalKeys}) { jobId}} + """ + params = {"globalKeys": global_keys} + clear_global_keys_job = self.execute(query_str, params) + + # Query string for retrieving job status and result, if job is done + result_query_str = """query clearGlobalKeysResultPyApi($jobId: ID!) { + clearGlobalKeysResult(jobId: {id: $jobId}) { data { + clearedGlobalKeys + failedToClearGlobalKeys + notFoundGlobalKeys + accessDeniedGlobalKeys + } jobStatus}} + """ + result_params = { + "jobId": clear_global_keys_job["clearGlobalKeys"]["jobId"] + } + # Poll job status until finished, then retrieve results + sleep_time = 2 + start_time = time.time() + while True: + res = self.execute(result_query_str, result_params) + if res["clearGlobalKeysResult"]["jobStatus"] == "COMPLETE": + data = res["clearGlobalKeysResult"]["data"] + results, errors = [], [] + results.extend(data["clearedGlobalKeys"]) + errors.extend( + _format_failed_rows( + data["failedToClearGlobalKeys"], + "Clearing global key failed", + ) + ) + errors.extend( + _format_failed_rows( + data["notFoundGlobalKeys"], + "Failed to find data row matching provided global key", + ) + ) + errors.extend( + _format_failed_rows( + data["accessDeniedGlobalKeys"], + "Denied access to modify data row matching provided global key", + ) + ) + + if not errors: + status = CollectionJobStatus.SUCCESS.value + elif errors and len(results) > 0: + status = CollectionJobStatus.PARTIAL_SUCCESS.value + else: + status = CollectionJobStatus.FAILURE.value + + if errors: + logger.warning( + "There are errors present. Please look at 'errors' in the returned dict for more details" + ) + + return {"status": status, "results": results, "errors": errors} + elif res["clearGlobalKeysResult"]["jobStatus"] == "FAILED": + raise lbox.exceptions.LabelboxError( + "Job clearGlobalKeys failed." + ) + current_time = time.time() + if current_time - start_time > timeout_seconds: + raise lbox.exceptions.TimeoutError( + "Timed out waiting for clear_global_keys job to complete." + ) + time.sleep(sleep_time) + + def get_catalog(self) -> Catalog: + return Catalog(client=self) + + def get_catalog_slice(self, slice_id) -> CatalogSlice: + """ + Fetches a Catalog Slice by ID. + + Args: + slice_id (str): The ID of the Slice + Returns: + CatalogSlice + """ + query_str = """query getSavedQueryPyApi($id: ID!) { + getSavedQuery(id: $id) { + id + name + description + filter + createdAt + updatedAt + } + } + """ + res = self.execute(query_str, {"id": slice_id}) + return Entity.CatalogSlice(self, res["getSavedQuery"]) + + def is_feature_schema_archived( + self, ontology_id: str, feature_schema_id: str + ) -> bool: + """ + Returns true if a feature schema is archived in the specified ontology, returns false otherwise. + + Args: + feature_schema_id (str): The ID of the feature schema + ontology_id (str): The ID of the ontology + Returns: + bool + """ + + ontology_endpoint = ( + self.rest_endpoint + + "/ontologies/" + + urllib.parse.quote(ontology_id) + ) + response = self.connection.get(ontology_endpoint) + + if response.status_code == requests.codes.ok: + feature_schema_nodes = response.json()["featureSchemaNodes"] + tools = feature_schema_nodes["tools"] + classifications = feature_schema_nodes["classifications"] + relationships = feature_schema_nodes["relationships"] + feature_schema_node_list = tools + classifications + relationships + filtered_feature_schema_nodes = [ + feature_schema_node + for feature_schema_node in feature_schema_node_list + if feature_schema_node["featureSchemaId"] == feature_schema_id + ] + if filtered_feature_schema_nodes: + return bool(filtered_feature_schema_nodes[0]["archived"]) + else: + raise lbox.exceptions.LabelboxError( + "The specified feature schema was not in the ontology." + ) + + elif response.status_code == 404: + raise lbox.exceptions.ResourceNotFoundError(Ontology, ontology_id) + else: + raise lbox.exceptions.LabelboxError( + "Failed to get the feature schema archived status." + ) + + def get_model_slice(self, slice_id) -> ModelSlice: + """ + Fetches a Model Slice by ID. + + Args: + slice_id (str): The ID of the Slice + Returns: + ModelSlice + """ + query_str = """ + query getSavedQueryPyApi($id: ID!) { + getSavedQuery(id: $id) { + id + name + description + filter + createdAt + updatedAt + } + } + """ + res = self.execute(query_str, {"id": slice_id}) + if res is None or res["getSavedQuery"] is None: + raise lbox.exceptions.ResourceNotFoundError(ModelSlice, slice_id) + + return Entity.ModelSlice(self, res["getSavedQuery"]) + + def delete_feature_schema_from_ontology( + self, ontology_id: str, feature_schema_id: str + ) -> DeleteFeatureFromOntologyResult: + """ + Deletes or archives a feature schema from an ontology. + If the feature schema is a root level node with associated labels, it will be archived. + If the feature schema is a nested node in the ontology and does not have associated labels, it will be deleted. + If the feature schema is a nested node in the ontology and has associated labels, it will not be deleted. + + Args: + ontology_id (str): The ID of the ontology. + feature_schema_id (str): The ID of the feature schema. + + Returns: + DeleteFeatureFromOntologyResult: The result of the feature schema removal. + + Example: + >>> client.delete_feature_schema_from_ontology(, ) + """ + ontology_endpoint = ( + self.rest_endpoint + + "/ontologies/" + + urllib.parse.quote(ontology_id) + + "/feature-schemas/" + + urllib.parse.quote(feature_schema_id) + ) + response = self.connection.delete(ontology_endpoint) + + if response.status_code == requests.codes.ok: + response_json = response.json() + if response_json["archived"] is True: + logger.info( + "Feature schema was archived from the ontology because it had associated labels." + ) + elif response_json["deleted"] is True: + logger.info( + "Feature schema was successfully removed from the ontology" + ) + result = DeleteFeatureFromOntologyResult() + result.archived = bool(response_json["archived"]) + result.deleted = bool(response_json["deleted"]) + return result + else: + raise lbox.exceptions.LabelboxError( + "Failed to remove feature schema from ontology, message: " + + str(response.json()["message"]) + ) + + def unarchive_feature_schema_node( + self, ontology_id: str, root_feature_schema_id: str + ) -> None: + """ + Unarchives a feature schema node in an ontology. + Only root level feature schema nodes can be unarchived. + Args: + ontology_id (str): The ID of the ontology + root_feature_schema_id (str): The ID of the root level feature schema + Returns: + None + """ + ontology_endpoint = ( + self.rest_endpoint + + "/ontologies/" + + urllib.parse.quote(ontology_id) + + "/feature-schemas/" + + urllib.parse.quote(root_feature_schema_id) + + "/unarchive" + ) + response = self.connection.patch(ontology_endpoint) + if response.status_code == requests.codes.ok: + if not bool(response.json()["unarchived"]): + raise lbox.exceptions.LabelboxError( + "Failed unarchive the feature schema." + ) + else: + raise lbox.exceptions.LabelboxError( + "Failed unarchive the feature schema node, message: ", + response.text, + ) + + def get_batch(self, project_id: str, batch_id: str) -> Entity.Batch: + # obtain batch entity to return + get_batch_str = """query %s($projectId: ID!, $batchId: ID!) { + project(where: {id: $projectId}) { + batches(where: {id: $batchId}) { + nodes { + %s + } + } + } + } + """ % ( + "getProjectBatchPyApi", + query.results_query_part(Entity.Batch), + ) + + batch = self.execute( + get_batch_str, + {"projectId": project_id, "batchId": batch_id}, + timeout=180.0, + experimental=True, + )["project"]["batches"]["nodes"][0] + + return Entity.Batch(self, project_id, batch) + + def send_to_annotate_from_catalog( + self, + destination_project_id: str, + task_queue_id: Optional[str], + batch_name: str, + data_rows: Union[DataRowIds, GlobalKeys], + params: Dict[str, Any], + ): + """ + Sends data rows from catalog to a specified project for annotation. + + Example usage: + >>> task = client.send_to_annotate_from_catalog( + >>> destination_project_id=DESTINATION_PROJECT_ID, + >>> task_queue_id=TASK_QUEUE_ID, + >>> batch_name="batch_name", + >>> data_rows=UniqueIds([DATA_ROW_ID]), + >>> params={ + >>> "source_project_id": + >>> SOURCE_PROJECT_ID, + >>> "override_existing_annotations_rule": + >>> ConflictResolutionStrategy.OverrideWithAnnotations + >>> }) + >>> task.wait_till_done() + + Args: + destination_project_id: The ID of the project to send the data rows to. + task_queue_id: The ID of the task queue to send the data rows to. If not specified, the data rows will be + sent to the Done workflow state. + batch_name: The name of the batch to create. If more than one batch is created, additional batches will be + named with a monotonically increasing numerical suffix, starting at "_1". + data_rows: The data rows to send to the project. + params: Additional parameters to configure the job. See SendToAnnotateFromCatalogParams for more details. + + Returns: The created task for this operation. + + """ + + validated_params = SendToAnnotateFromCatalogParams(**params) + + mutation_str = """mutation SendToAnnotateFromCatalogPyApi($input: SendToAnnotateFromCatalogInput!) { + sendToAnnotateFromCatalog(input: $input) { + taskId + } + } + """ + + destination_task_queue = build_destination_task_queue_input( + task_queue_id + ) + data_rows_query = self.build_catalog_query(data_rows) + + predictions_input = ( + build_predictions_input( + validated_params.predictions_ontology_mapping, + validated_params.source_model_run_id, + ) + if validated_params.source_model_run_id + else None + ) + + annotations_input = ( + build_annotations_input( + validated_params.annotations_ontology_mapping, + validated_params.source_project_id, + ) + if validated_params.source_project_id + else None + ) + + res = self.execute( + mutation_str, + { + "input": { + "destinationProjectId": destination_project_id, + "batchInput": { + "batchName": batch_name, + "batchPriority": validated_params.batch_priority, + }, + "destinationTaskQueue": destination_task_queue, + "excludeDataRowsInProject": validated_params.exclude_data_rows_in_project, + "annotationsInput": annotations_input, + "predictionsInput": predictions_input, + "conflictLabelsResolutionStrategy": validated_params.override_existing_annotations_rule, + "searchQuery": {"scope": None, "query": [data_rows_query]}, + "ordering": { + "type": "RANDOM", + "random": {"seed": random.randint(0, 10000)}, + "sorting": None, + }, + "sorting": None, + "limit": None, + } + }, + )["sendToAnnotateFromCatalog"] + + return Entity.Task.get_task(self, res["taskId"]) + + @staticmethod + def build_catalog_query(data_rows: Union[DataRowIds, GlobalKeys]): + """ + Given a list of data rows, builds a query that can be used to fetch the associated data rows from the catalog. + + Args: + data_rows: A list of data rows. Can be either UniqueIds or GlobalKeys. + + Returns: A query that can be used to fetch the associated data rows from the catalog. + + """ + if isinstance(data_rows, DataRowIds): + data_rows_query = { + "type": "data_row_id", + "operator": "is", + "ids": list(data_rows), + } + elif isinstance(data_rows, GlobalKeys): + data_rows_query = { + "type": "global_key", + "operator": "is", + "ids": list(data_rows), + } + else: + raise ValueError( + f"Invalid data_rows type {type(data_rows)}. Type of data_rows must be DataRowIds or GlobalKey" + ) + return data_rows_query + + def run_foundry_app( + self, + model_run_name: str, + data_rows: Union[DataRowIds, GlobalKeys], + app_id: str, + ) -> Task: + """ + Run a foundry app + + Args: + model_run_name (str): Name of a new model run to store app predictions in + data_rows (DataRowIds or GlobalKeys): Data row identifiers to run predictions on + app_id (str): Foundry app to run predictions with + """ + foundry_client = FoundryClient(self) + return foundry_client.run_app(model_run_name, data_rows, app_id) + + def create_embedding(self, name: str, dims: int) -> Embedding: + """ + Create a new embedding. You must provide a name and the + number of dimensions the embedding has. Once an + embedding has been created, you can upload the vector + data associated with the embedding id. + + Args: + name: The name of the embedding. + dims: The number of dimensions. + + Returns: + A new Embedding object. + """ + data = self._adv_client.create_embedding(name, dims) + return Embedding(self._adv_client, **data) + + def get_embeddings(self) -> List[Embedding]: + """ + Return a list of all embeddings for the current organization. + + Returns: + A list of embedding objects. + """ + results = self._adv_client.get_embeddings() + return [Embedding(self._adv_client, **data) for data in results] + + def get_embedding_by_id(self, id: str) -> Embedding: + """ + Return the embedding for the provided embedding id. + + Args: + id: The embedding ID. + + Returns: + The embedding object. + """ + data = self._adv_client.get_embedding(id) + return Embedding(self._adv_client, **data) + + def get_embedding_by_name(self, name: str) -> Embedding: + """ + Return the embedding for the provided embedding name. + + Args: + name: The embedding name + + Returns: + The embedding object. + """ + # NB: It's safe to do the filtering client-side as we only allow 10 embeddings per org. + embeddings = self.get_embeddings() + for e in embeddings: + if e.name == name: + return e + raise lbox.exceptions.ResourceNotFoundError(Embedding, dict(name=name)) + + def upsert_label_feedback( + self, label_id: str, feedback: str, scores: Dict[str, float] + ) -> List[LabelScore]: + """ + Submits the label feedback which is a free-form text and numeric + label scores. + + Args: + label_id: Target label ID + feedback: Free text comment regarding the label + scores: A dict of scores, the key is a score name and the value is + the score value + + Returns: + A list of LabelScore instances + """ + mutation_str = """ + mutation UpsertAutoQaLabelFeedbackPyApi( + $labelId: ID! + $feedback: String! + $scores: Json! + ) { + upsertAutoQaLabelFeedback( + input: { + labelId: $labelId, + feedback: $feedback, + scores: $scores + } + ) { + id + scores { + id + name + score + } + } + } + """ + res = self.execute( + mutation_str, + {"labelId": label_id, "feedback": feedback, "scores": scores}, + ) + scores_raw = res["upsertAutoQaLabelFeedback"]["scores"] + + return [ + LabelScore(name=x["name"], score=x["score"]) for x in scores_raw + ] + + def get_labeling_service_dashboards( + self, + search_query: Optional[List[SearchFilter]] = None, + ) -> PaginatedCollection: + """ + Get all labeling service dashboards for a given org. + + Optional parameters: + search_query: A list of search filters representing the search + + NOTE: + - Retrieves all projects for the organization or as filtered by the search query + - INCLUDING those not requesting labeling services + - Sorted by project created date in ascending order. + + Examples: + Retrieves all labeling service dashboards for a given workspace id: + >>> workspace_filter = WorkspaceFilter( + >>> operation=OperationType.Workspace, + >>> operator=IdOperator.Is, + >>> values=[workspace_id]) + >>> labeling_service_dashboard = [ + >>> ld for ld in project.client.get_labeling_service_dashboards(search_query=[workspace_filter])] + + Retrieves all labeling service dashboards requested less than 7 days ago: + >>> seven_days_ago = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") + >>> workforce_requested_filter_before = WorkforceRequestedDateFilter( + >>> operation=OperationType.WorforceRequestedDate, + >>> value=DateValue(operator=RangeDateTimeOperatorWithSingleValue.GreaterThanOrEqual, + >>> value=seven_days_ago)) + >>> labeling_service_dashboard = [ld for ld in project.client.get_labeling_service_dashboards(search_query=[workforce_requested_filter_before])] + + See libs/labelbox/src/labelbox/schema/search_filters.py and libs/labelbox/tests/unit/test_unit_search_filters.py for more examples. + """ + return LabelingServiceDashboard.get_all(self, search_query=search_query) + + def get_task_by_id(self, task_id: str) -> Union[Task, DataUpsertTask]: + """ + Fetches a task by ID. + + Args: + task_id (str): The ID of the task. + + Returns: + Task or DataUpsertTask + + Throws: + ResourceNotFoundError: If the task does not exist. + + NOTE: Export task is not supported yet + """ + user = self.get_user() + query = """ + query GetUserCreatedTasksPyApi($userId: ID!, $taskId: ID!) { + user(where: {id: $userId}) { + createdTasks(where: {id: $taskId} skip: 0 first: 1) { + completionPercentage + createdAt + errors + metadata + name + result + status + type + id + updatedAt + } + } + } + """ + result = self.execute(query, {"userId": user.uid, "taskId": task_id}) + data = result.get("user", {}).get("createdTasks", []) + if not data: + raise lbox.exceptions.ResourceNotFoundError( + message=f"The task {task_id} does not exist." + ) + task_data = data[0] + if task_data["type"].lower() == "adv-upsert-data-rows": + task = DataUpsertTask(self, task_data) + else: + task = Task(self, task_data) + + task._user = user + return task From 321cdafcab6cfc414a198ce96ad01d1db93c663f Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:23:10 -0500 Subject: [PATCH 39/44] Remove test_labeling_service changes --- libs/labelbox/tests/integration/test_labeling_service.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index 91fd2c5fb..bba8cef78 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -1,10 +1,6 @@ import pytest from lbox.exceptions import LabelboxError, ResourceNotFoundError -from labelbox.exceptions import ( - MalformedQueryException, - ResourceNotFoundError, -) from labelbox.schema.labeling_service import LabelingServiceStatus @@ -55,7 +51,7 @@ def test_request_labeling_service_moe_project( labeling_service = project.get_labeling_service() with pytest.raises( - MalformedQueryException, + LabelboxError, match='[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]', ): labeling_service.request() @@ -77,5 +73,5 @@ def test_request_labeling_service_incomplete_requirements(ontology, project): ): # No labeling service by default labeling_service.request() project.connect_ontology(ontology) - with pytest.raises(MalformedQueryException): + with pytest.raises(LabelboxError): labeling_service.request() From 673f3670c8bd70e4ccc730b17a839decd5456b32 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:24:38 -0500 Subject: [PATCH 40/44] Flip string remover to before validation --- libs/labelbox/src/labelbox/schema/data_row_metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index f67b3d269..4f11170fd 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -27,7 +27,7 @@ conlist, ConfigDict, model_serializer, - AfterValidator, + BeforeValidator, ) from labelbox.schema.ontology import SchemaId @@ -39,7 +39,7 @@ Name = Annotated[ str, - AfterValidator(lambda x: str.strip(str(x))), + BeforeValidator(lambda x: str.strip(str(x))), Field(min_length=1, max_length=100), ] From d3ee85a401b8f382b2d00bdb5e7cb76eacf771ec Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:37:08 -0500 Subject: [PATCH 41/44] removed deprecated timezone from tests --- libs/labelbox/tests/integration/test_data_rows.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 78a1efb3a..7d777a28a 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -1,7 +1,7 @@ import json import os import uuid -from datetime import datetime +from datetime import datetime, timezone from tempfile import NamedTemporaryFile from unittest.mock import patch @@ -95,7 +95,7 @@ def tile_content(): def make_metadata_fields(): msg = "A message" - time = datetime.utcnow() + time = datetime.now(timezone.utc) fields = [ DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID), @@ -107,7 +107,7 @@ def make_metadata_fields(): def make_metadata_fields_dict(): msg = "A message" - time = datetime.utcnow() + time = datetime.now(timezone.utc) fields = [ {"schema_id": SPLIT_SCHEMA_ID, "value": TEST_SPLIT_ID}, From dd531eea2d39c7b3e7037c585dddd00350784ce1 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 23 Sep 2024 22:38:12 -0500 Subject: [PATCH 42/44] Fixed labeling service --- .../schema/labeling_service_dashboard.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index e2c6fa26b..6f5a6096e 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -1,9 +1,9 @@ from datetime import datetime from string import Template -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING from lbox.exceptions import ResourceNotFoundError -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, model_validator, model_serializer from labelbox.pagination import PaginatedCollection from labelbox.schema.labeling_service_status import LabelingServiceStatus @@ -13,6 +13,9 @@ from .ontology_kind import EditorTaskType +if TYPE_CHECKING: + from labelbox import Client + GRAPHQL_QUERY_SELECTIONS = """ id name @@ -50,7 +53,7 @@ class LabelingServiceDashboard(_CamelCaseMixin): Represent labeling service data for a project NOTE on tasks vs data rows. A task is a unit of work that is assigned to a user. A data row is a unit of data that needs to be labeled. - In the current implementation a task reprsents a single data row. However tasks only exists when a labeler start labeling a data row. + In the current implementation a task represents a single data row. However tasks only exists when a labeler start labeling a data row. So if a data row is not labeled, it will not have a task associated with it. Therefore the number of tasks can be less than the number of data rows. Attributes: @@ -79,7 +82,7 @@ class LabelingServiceDashboard(_CamelCaseMixin): editor_task_type: EditorTaskType = Field(frozen=True, default=None) tags: List[LabelingServiceDashboardTags] = Field(frozen=True, default=None) - client: Any # type Any to avoid circular import from client + client: "Client" def __init__(self, **kwargs): super().__init__(**kwargs) @@ -221,8 +224,9 @@ def convert_boost_data(cls, data): return data - def dict(self, *args, **kwargs): - row = super().dict(*args, **kwargs) + @model_serializer(mode="wrap") + def ser_model(self, handler): + row = handler(self) row.pop("client") row["service_type"] = self.service_type return row From fb60c8840468fe2af8ffca347c4c5f8d60caeeda Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:56:43 -0500 Subject: [PATCH 43/44] Swapped to Any --- .../src/labelbox/schema/labeling_service_dashboard.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index 6f5a6096e..b9015bcd0 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -13,9 +13,6 @@ from .ontology_kind import EditorTaskType -if TYPE_CHECKING: - from labelbox import Client - GRAPHQL_QUERY_SELECTIONS = """ id name @@ -82,7 +79,7 @@ class LabelingServiceDashboard(_CamelCaseMixin): editor_task_type: EditorTaskType = Field(frozen=True, default=None) tags: List[LabelingServiceDashboardTags] = Field(frozen=True, default=None) - client: "Client" + client: Any def __init__(self, **kwargs): super().__init__(**kwargs) @@ -224,9 +221,9 @@ def convert_boost_data(cls, data): return data - @model_serializer(mode="wrap") - def ser_model(self, handler): - row = handler(self) + @model_serializer() + def ser_model(self): + row = self row.pop("client") row["service_type"] = self.service_type return row From 87821175f9b41b8d6e091b154b0b8c42bffa767d Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 24 Sep 2024 09:14:17 -0500 Subject: [PATCH 44/44] Revert some changes --- .../src/labelbox/schema/labeling_service_dashboard.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index b9015bcd0..2f91af7af 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -1,6 +1,6 @@ from datetime import datetime from string import Template -from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING +from typing import Any, Dict, List, Optional, Union from lbox.exceptions import ResourceNotFoundError from pydantic import BaseModel, Field, model_validator, model_serializer @@ -79,7 +79,7 @@ class LabelingServiceDashboard(_CamelCaseMixin): editor_task_type: EditorTaskType = Field(frozen=True, default=None) tags: List[LabelingServiceDashboardTags] = Field(frozen=True, default=None) - client: Any + client: Any # type Any to avoid circular import from client def __init__(self, **kwargs): super().__init__(**kwargs)