From 0bbd7c290ac4aa55bc5a373b3b63fa77c68dc39f Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 16 Sep 2024 18:05:39 -0700 Subject: [PATCH 01/22] Vb/fix ontology leaks plt 1379 (#1814) --- .../labelbox/schema/bulk_import_request.py | 8 +- .../schema/labeling_service_dashboard.py | 38 ++-- libs/labelbox/tests/conftest.py | 191 +++++++++++++----- .../tests/data/annotation_import/conftest.py | 20 +- .../data/annotation_import/test_model_run.py | 20 +- libs/labelbox/tests/data/export/conftest.py | 11 +- .../tests/data/test_data_row_metadata.py | 15 -- libs/labelbox/tests/integration/conftest.py | 4 +- .../tests/integration/test_feature_schema.py | 18 +- .../unit/test_labeling_service_dashboard.py | 102 +++++----- 10 files changed, 260 insertions(+), 167 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py index 44ac7cd6a..8e11f3261 100644 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ b/libs/labelbox/src/labelbox/schema/bulk_import_request.py @@ -787,9 +787,7 @@ def validate_feature_schemas( # A union with custom construction logic to improve error messages class NDClassification( SpecialUnion, - Type[ # type: ignore - Union[NDText, NDRadio, NDChecklist] - ], + Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore ): ... @@ -979,9 +977,7 @@ class NDTool( class NDAnnotation( SpecialUnion, - Type[ # type: ignore - Union[NDTool, NDClassification] - ], + Type[Union[NDTool, NDClassification]], # type: ignore ): @classmethod def build(cls: Any, data) -> "NDBase": diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index 2052897f6..c5e1fa11e 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -84,7 +84,8 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if not self.client.enable_experimental: raise RuntimeError( - "Please enable experimental in client to use LabelingService") + "Please enable experimental in client to use LabelingService" + ) @property def service_type(self): @@ -97,20 +98,28 @@ def service_type(self): if self.editor_task_type is None: return sentence_case(self.media_type.value) - if (self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation - and self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Offline chat evaluation" - if (self.editor_task_type == EditorTaskType.ModelChatEvaluation and - self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.ModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Live chat evaluation" - if (self.editor_task_type == EditorTaskType.ResponseCreation and - self.media_type == MediaType.Text): + if ( + self.editor_task_type == EditorTaskType.ResponseCreation + and self.media_type == MediaType.Text + ): return "Response creation" - if (self.media_type == MediaType.LLMPromptCreation or - self.media_type == MediaType.LLMPromptResponseCreation): + if ( + self.media_type == MediaType.LLMPromptCreation + or self.media_type == MediaType.LLMPromptResponseCreation + ): return "Prompt response creation" return sentence_case(self.media_type.value) @@ -154,7 +163,8 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) else: template = Template( """query SearchProjectsPyApi($$first: Int, $$from: String) { @@ -164,11 +174,13 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) query_str = template.substitute( labeling_dashboard_selections=GRAPHQL_QUERY_SELECTIONS, search_query=build_search_filter(search_query) - if search_query else None, + if search_query + else None, ) params: Dict[str, Union[str, int]] = {} @@ -186,7 +198,7 @@ def convert_to_labeling_service_dashboard(client, data): experimental=True, ) - @model_validator(mode='before') + @model_validator(mode="before") def convert_boost_data(cls, data): if "boostStatus" in data: data["status"] = LabelingServiceStatus(data.pop("boostStatus")) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index 446db396b..6d13a8d83 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -7,7 +7,9 @@ import re import uuid import time +from labelbox.schema.project import Project import requests +from labelbox.schema.ontology import Ontology import pytest from types import SimpleNamespace from typing import Type @@ -23,21 +25,11 @@ from labelbox.schema.queue_mode import QueueMode from labelbox import Client -from labelbox import Dataset, DataRow from labelbox import LabelingFrontend -from labelbox import OntologyBuilder, Tool, Option, Classification, MediaType -from labelbox.orm import query -from labelbox.pagination import PaginatedCollection +from labelbox import OntologyBuilder, Tool, Option, Classification from labelbox.schema.annotation_import import LabelImport -from labelbox.schema.catalog import Catalog from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.invite import Invite -from labelbox.schema.quality_mode import QualityMode -from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.user import User from labelbox.exceptions import LabelboxError -from contextlib import suppress -from labelbox import Client IMG_URL = "https://picsum.photos/200/300.jpg" MASKABLE_IMG_URL = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg" @@ -638,17 +630,22 @@ def organization(client): def configured_project_with_label( client, rand_gen, - image_url, - project, dataset, data_row, wait_for_label_processing, + teardown_helpers, ): """Project with a connected dataset, having one datarow + Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) project._wait_until_data_rows_are_processed( data_row_ids=[data_row.uid], wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS, @@ -666,8 +663,7 @@ def configured_project_with_label( ) yield [project, dataset, data_row, label] - for label in project.labels(): - label.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) def _create_label(project, data_row, ontology, wait_for_label_processing): @@ -736,13 +732,23 @@ def big_dataset(dataset: Dataset): @pytest.fixture def configured_batch_project_with_label( - project, dataset, data_row, wait_for_label_processing + client, + dataset, + data_row, + wait_for_label_processing, + rand_gen, + teardown_helpers, ): """Project with a batch having one datarow Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) data_rows = [dr.uid for dr in list(dataset.data_rows())] project._wait_until_data_rows_are_processed( data_row_ids=data_rows, sleep_interval=3 @@ -757,18 +763,27 @@ def configured_batch_project_with_label( yield [project, dataset, data_row, label] - for label in project.labels(): - label.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture def configured_batch_project_with_multiple_datarows( - project, dataset, data_rows, wait_for_label_processing + client, + dataset, + data_rows, + wait_for_label_processing, + rand_gen, + teardown_helpers, ): """Project with a batch having multiple datarows Project contains an ontology with 1 bbox tool Additionally includes a create_label method for any needed extra labels """ + project = client.create_project( + name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image, + ) global_keys = [dr.global_key for dr in data_rows] batch_name = f"batch {uuid.uuid4()}" @@ -780,26 +795,7 @@ def configured_batch_project_with_multiple_datarows( yield [project, dataset, data_rows] - for label in project.labels(): - label.delete() - - -@pytest.fixture -def configured_batch_project_for_labeling_service( - project, data_row_and_global_key -): - """Project with a batch having multiple datarows - Project contains an ontology with 1 bbox tool - Additionally includes a create_label method for any needed extra labels - """ - global_keys = [data_row_and_global_key[1]] - - batch_name = f"batch {uuid.uuid4()}" - project.create_batch(batch_name, global_keys=global_keys) - - _setup_ontology(project) - - yield project + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) # NOTE this is nice heuristics, also there is this logic _wait_until_data_rows_are_processed in Project @@ -1062,7 +1058,7 @@ def project_with_empty_ontology(project): @pytest.fixture def configured_project_with_complex_ontology( - client, initial_dataset, rand_gen, image_url + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), @@ -1127,7 +1123,7 @@ def configured_project_with_complex_ontology( project.setup(editor, ontology.asdict()) yield [project, data_row] - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture @@ -1147,12 +1143,13 @@ def valid_model_id(): @pytest.fixture def requested_labeling_service( - rand_gen, - live_chat_evaluation_project_with_new_dataset, - chat_evaluation_ontology, - model_config, + rand_gen, client, chat_evaluation_ontology, model_config, teardown_helpers ): - project = live_chat_evaluation_project_with_new_dataset + project_name = f"test-model-evaluation-project-{rand_gen(str)}" + dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}" + project = client.create_model_evaluation_project( + name=project_name, dataset_name=dataset_name, data_row_count=1 + ) project.connect_ontology(chat_evaluation_ontology) project.upsert_instructions("tests/integration/media/sample_pdf.pdf") @@ -1164,3 +1161,105 @@ def requested_labeling_service( labeling_service.request() yield project, project.get_labeling_service() + + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) + + +class TearDownHelpers: + @staticmethod + def teardown_project_labels_ontology_feature_schemas(project: Project): + """ + Call this function to release project, labels, ontology and feature schemas in fixture teardown + + NOTE: exception handling is not required as this is a fixture teardown + """ + ontology = project.ontology() + ontology_id = ontology.uid + client = project.client + classification_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["classifications"] + ] + tool_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["tools"] + ] + + feature_schema_ids = ( + classification_feature_schema_ids + tool_feature_schema_ids + ) + labels = list(project.labels()) + for label in labels: + label.delete() + + project.delete() + client.delete_unused_ontology(ontology_id) + for feature_schema_id in feature_schema_ids: + try: + project.client.delete_unused_feature_schema(feature_schema_id) + except LabelboxError as e: + print( + f"Failed to delete feature schema {feature_schema_id}: {e}" + ) + + @staticmethod + def teardown_ontology_feature_schemas(ontology: Ontology): + """ + Call this function to release project, labels, ontology and feature schemas in fixture teardown + + NOTE: exception handling is not required as this is a fixture teardown + """ + ontology_id = ontology.uid + client = ontology.client + classification_feature_schema_ids = [ + feature["featureSchemaId"] + for feature in ontology.normalized["classifications"] + ] + [ + option["featureSchemaId"] + for feature in ontology.normalized["classifications"] + for option in feature.get("options", []) + ] + + tool_feature_schema_ids = ( + [ + feature["featureSchemaId"] + for feature in ontology.normalized["tools"] + ] + + [ + classification["featureSchemaId"] + for tool in ontology.normalized["tools"] + for classification in tool.get("classifications", []) + ] + + [ + option["featureSchemaId"] + for tool in ontology.normalized["tools"] + for classification in tool.get("classifications", []) + for option in classification.get("options", []) + ] + ) + + feature_schema_ids = ( + classification_feature_schema_ids + tool_feature_schema_ids + ) + + client.delete_unused_ontology(ontology_id) + for feature_schema_id in feature_schema_ids: + try: + project.client.delete_unused_feature_schema(feature_schema_id) + except LabelboxError as e: + print( + f"Failed to delete feature schema {feature_schema_id}: {e}" + ) + + +class ModuleTearDownHelpers(TearDownHelpers): ... + + +@pytest.fixture +def teardown_helpers(): + return TearDownHelpers() + + +@pytest.fixture(scope="module") +def module_teardown_helpers(): + return TearDownHelpers() diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 39cede0bb..6543f54bf 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -1,4 +1,3 @@ -import itertools import uuid from labelbox.schema.model_run import ModelRun @@ -14,7 +13,6 @@ from typing import Tuple, Type from labelbox.schema.annotation_import import LabelImport, AnnotationImportState from pytest import FixtureRequest -from contextlib import suppress """ The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType. @@ -719,7 +717,6 @@ def _create_project( ) project.connect_ontology(ontology) - data_row_data = [] for _ in range(DATA_ROW_COUNT): @@ -752,6 +749,7 @@ def configured_project( normalized_ontology_by_media_type, export_v2_test_helpers, llm_prompt_response_creation_dataset_with_data_row, + teardown_helpers, ): """Configure project for test. Request.param will contain the media type if not present will use Image MediaType. The project will have 10 data rows.""" @@ -789,13 +787,11 @@ def configured_project( yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture() def configured_project_by_global_key( @@ -805,6 +801,7 @@ def configured_project_by_global_key( request: FixtureRequest, normalized_ontology_by_media_type, export_v2_test_helpers, + teardown_helpers, ): """Does the same thing as configured project but with global keys focus.""" @@ -841,13 +838,11 @@ def configured_project_by_global_key( yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture(scope="module") def module_project( @@ -856,6 +851,7 @@ def module_project( data_row_json_by_media_type, request: FixtureRequest, normalized_ontology_by_media_type, + module_teardown_helpers, ): """Generates a image project that scopes to the test module(file). Used to reduce api calls.""" @@ -889,13 +885,13 @@ def module_project( yield project - project.delete() + module_teardown_helpers.teardown_project_labels_ontology_feature_schemas( + project + ) if dataset: dataset.delete() - client.delete_unused_ontology(ontology.uid) - @pytest.fixture def prediction_id_mapping(request, normalized_ontology_by_media_type): diff --git a/libs/labelbox/tests/data/annotation_import/test_model_run.py b/libs/labelbox/tests/data/annotation_import/test_model_run.py index 9eca28429..1174115c5 100644 --- a/libs/labelbox/tests/data/annotation_import/test_model_run.py +++ b/libs/labelbox/tests/data/annotation_import/test_model_run.py @@ -7,13 +7,23 @@ from labelbox import DataSplit, ModelRun -@pytest.mark.order(1) -def test_model_run(client, configured_project_with_label, data_row, rand_gen): +@pytest.fixture +def current_model(client, configured_project_with_label, rand_gen): project, _, _, label = configured_project_with_label - label_id = label.uid ontology = project.ontology() - data = {"name": rand_gen(str), "ontology_id": ontology.uid} - model = client.create_model(data["name"], data["ontology_id"]) + + model = client.create_model(rand_gen(str), ontology.uid) + yield model + + model.delete() + + +def test_model_run( + client, configured_project_with_label, current_model, data_row, rand_gen +): + _, _, _, label = configured_project_with_label + label_id = label.uid + model = current_model name = rand_gen(str) config = {"batch_size": 100, "reruns": None} diff --git a/libs/labelbox/tests/data/export/conftest.py b/libs/labelbox/tests/data/export/conftest.py index 0836c2b9e..0a62f39c8 100644 --- a/libs/labelbox/tests/data/export/conftest.py +++ b/libs/labelbox/tests/data/export/conftest.py @@ -2,7 +2,6 @@ import time import pytest from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.media_type import MediaType from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.annotation_import import LabelImport, AnnotationImportState @@ -242,7 +241,7 @@ def polygon_inference(prediction_id_mapping): @pytest.fixture def configured_project_with_ontology( - client, initial_dataset, ontology, rand_gen, image_url + client, initial_dataset, ontology, rand_gen, image_url, teardown_helpers ): dataset = initial_dataset project = client.create_project( @@ -264,11 +263,13 @@ def configured_project_with_ontology( ) project.data_row_ids = data_row_ids yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture -def configured_project_without_data_rows(client, ontology, rand_gen): +def configured_project_without_data_rows( + client, ontology, rand_gen, teardown_helpers +): project = client.create_project( name=rand_gen(str), description=rand_gen(str), @@ -279,7 +280,7 @@ def configured_project_without_data_rows(client, ontology, rand_gen): )[0] project.setup(editor, ontology) yield project - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture diff --git a/libs/labelbox/tests/data/test_data_row_metadata.py b/libs/labelbox/tests/data/test_data_row_metadata.py index 9a3690776..891cab9be 100644 --- a/libs/labelbox/tests/data/test_data_row_metadata.py +++ b/libs/labelbox/tests/data/test_data_row_metadata.py @@ -92,21 +92,6 @@ def make_named_metadata(dr_id) -> DataRowMetadata: return metadata -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_export_empty_metadata( - client, configured_project_with_label, wait_for_data_row_processing -): - project, _, data_row, _ = configured_project_with_label - data_row = wait_for_data_row_processing(client, data_row) - - export_task = project.export(params={"metadata_fields": True}) - export_task.wait_till_done() - stream = export_task.get_buffered_stream() - data_row = [data_row.json for data_row in stream][0] - - assert data_row["metadata_fields"] == [] - - def test_bulk_export_datarow_metadata(data_row, mdo: DataRowMetadataOntology): metadata = make_metadata(data_row.uid) mdo.bulk_upsert([metadata]) diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index d37287fe8..c917a6164 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -113,7 +113,7 @@ def configured_project( @pytest.fixture def configured_project_with_complex_ontology( - client, initial_dataset, rand_gen, image_url + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), @@ -178,7 +178,7 @@ def configured_project_with_complex_ontology( project.setup(editor, ontology.asdict()) yield [project, data_row] - project.delete() + teardown_helpers.teardown_project_labels_ontology_feature_schemas(project) @pytest.fixture diff --git a/libs/labelbox/tests/integration/test_feature_schema.py b/libs/labelbox/tests/integration/test_feature_schema.py index 1dc940f08..46ec8c067 100644 --- a/libs/labelbox/tests/integration/test_feature_schema.py +++ b/libs/labelbox/tests/integration/test_feature_schema.py @@ -58,9 +58,8 @@ def test_throws_an_error_if_feature_schema_to_delete_doesnt_exist(client): client.delete_unused_feature_schema("doesntexist") -def test_updates_a_feature_schema_title(client): - tool = client.upsert_feature_schema(point.asdict()) - feature_schema_id = tool.normalized["featureSchemaId"] +def test_updates_a_feature_schema_title(client, feature_schema): + feature_schema_id = feature_schema.normalized["featureSchemaId"] new_title = "new title" updated_feature_schema = client.update_feature_schema_title( feature_schema_id, new_title @@ -68,20 +67,16 @@ def test_updates_a_feature_schema_title(client): assert updated_feature_schema.normalized["name"] == new_title - client.delete_unused_feature_schema(feature_schema_id) - def test_throws_an_error_when_updating_a_feature_schema_with_empty_title( - client, + client, feature_schema ): - tool = client.upsert_feature_schema(point.asdict()) + tool = feature_schema feature_schema_id = tool.normalized["featureSchemaId"] with pytest.raises(Exception): client.update_feature_schema_title(feature_schema_id, "") - client.delete_unused_feature_schema(feature_schema_id) - def test_throws_an_error_when_updating_not_existing_feature_schema(client): with pytest.raises(Exception): @@ -107,8 +102,8 @@ def test_updates_a_feature_schema(client, feature_schema): assert updated_feature_schema.normalized["name"] == "new name" -def test_does_not_include_used_feature_schema(client): - tool = client.upsert_feature_schema(point.asdict()) +def test_does_not_include_used_feature_schema(client, feature_schema): + tool = feature_schema feature_schema_id = tool.normalized["featureSchemaId"] ontology = client.create_ontology_from_feature_schemas( name="ontology name", @@ -120,4 +115,3 @@ def test_does_not_include_used_feature_schema(client): assert feature_schema_id not in unused_feature_schemas client.delete_unused_ontology(ontology.uid) - client.delete_unused_feature_schema(feature_schema_id) diff --git a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py index 8ecdef2f1..061efbadf 100644 --- a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py +++ b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py @@ -5,23 +5,23 @@ def test_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count is None @@ -29,23 +29,23 @@ def test_no_tasks_remaining_count(): def test_tasks_remaining_count_exists(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 1, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 1, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 1 @@ -53,23 +53,23 @@ def test_tasks_remaining_count_exists(): def test_tasks_total_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 1, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 1, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 1, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 1, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 0 From 51ecfeab2efa15402d949b5799e21f77ea26ee95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20J=C3=B3=C5=BAwiak?= Date: Mon, 9 Sep 2024 15:24:35 +0200 Subject: [PATCH 02/22] [PTDT-2553] Added integration tests for MMC MAL/GT imports --- .../tests/data/annotation_import/conftest.py | 495 +++++++++++++++++- .../test_generic_data_types.py | 6 + 2 files changed, 500 insertions(+), 1 deletion(-) diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 6543f54bf..2342a759a 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -1,4 +1,5 @@ import uuid +from typing import Union from labelbox.schema.model_run import ModelRun from labelbox.schema.ontology import Ontology @@ -152,6 +153,22 @@ def llm_human_preference_data_row(global_key): return llm_human_preference_data_row +@pytest.fixture(scope="module") +def mmc_data_row_url(): + return "https://storage.googleapis.com/labelbox-datasets/conversational_model_evaluation_sample/offline-model-chat-evaluation.json" + + +@pytest.fixture(scope="module", autouse=True) +def offline_model_evaluation_data_row_factory(mmc_data_row_url: str): + def offline_model_evaluation_data_row(global_key: str): + return { + "row_data": mmc_data_row_url, + "global_key": global_key, + } + + return offline_model_evaluation_data_row + + @pytest.fixture(scope="module", autouse=True) def data_row_json_by_media_type( audio_data_row_factory, @@ -163,6 +180,7 @@ def data_row_json_by_media_type( document_data_row_factory, text_data_row_factory, video_data_row_factory, + offline_model_evaluation_data_row_factory, ): return { MediaType.Audio: audio_data_row_factory, @@ -174,6 +192,7 @@ def data_row_json_by_media_type( MediaType.Document: document_data_row_factory, MediaType.Text: text_data_row_factory, MediaType.Video: video_data_row_factory, + OntologyKind.ModelEvaluation: offline_model_evaluation_data_row_factory, } @@ -345,6 +364,26 @@ def normalized_ontology_by_media_type(): ], } + radio_index = { + "required": False, + "instructions": "radio_index", + "name": "radio_index", + "type": "radio", + "scope": "index", + "options": [ + { + "label": "first_radio_answer", + "value": "first_radio_answer", + "options": [], + }, + { + "label": "second_radio_answer", + "value": "second_radio_answer", + "options": [], + }, + ], + } + prompt_text = { "instructions": "prompt-text", "name": "prompt-text", @@ -403,6 +442,27 @@ def normalized_ontology_by_media_type(): "type": "response-text", } + message_single_selection_task = { + "required": False, + "name": "message-single-selection", + "tool": "message-single-selection", + "classifications": [], + } + + message_multi_selection_task = { + "required": False, + "name": "message-multi-selection", + "tool": "message-multi-selection", + "classifications": [], + } + + message_ranking_task = { + "required": False, + "name": "message-ranking", + "tool": "message-ranking", + "classifications": [], + } + return { MediaType.Image: { "tools": [ @@ -516,6 +576,21 @@ def normalized_ontology_by_media_type(): response_checklist, ], }, + OntologyKind.ModelEvaluation: { + "tools": [ + message_single_selection_task, + message_multi_selection_task, + message_ranking_task, + ], + "classifications": [ + radio, + checklist, + free_form_text, + radio_index, + checklist_index, + free_form_text_index, + ], + }, "all": { "tools": [ bbox_tool, @@ -695,6 +770,45 @@ def _create_prompt_response_project( return prompt_response_project, ontology +def _create_offline_mmc_project( + client: Client, rand_gen, data_row_json, normalized_ontology +) -> Tuple[Project, Ontology, Dataset]: + dataset = client.create_dataset(name=rand_gen(str)) + + project = client.create_offline_model_evaluation_project( + name=f"offline-mmc-{rand_gen(str)}", + ) + + ontology = client.create_ontology( + name=f"offline-mmc-{rand_gen(str)}", + normalized=normalized_ontology, + media_type=MediaType.Conversational, + ontology_kind=OntologyKind.ModelEvaluation, + ) + + project.connect_ontology(ontology) + + data_row_data = [ + data_row_json(rand_gen(str)) for _ in range(DATA_ROW_COUNT) + ] + + task = dataset.create_data_rows(data_row_data) + task.wait_till_done() + global_keys = [row["global_key"] for row in task.result] + data_row_ids = [row["id"] for row in task.result] + + project.create_batch( + rand_gen(str), + data_row_ids, # sample of data row objects + 5, # priority between 1(Highest) - 5(lowest) + ) + project.data_row_ids = data_row_ids + project.data_row_data = data_row_data + project.global_keys = global_keys + + return project, ontology, dataset + + def _create_project( client: Client, rand_gen, @@ -753,7 +867,10 @@ def configured_project( ): """Configure project for test. Request.param will contain the media type if not present will use Image MediaType. The project will have 10 data rows.""" - media_type = getattr(request, "param", MediaType.Image) + media_type: Union[MediaType, OntologyKind] = getattr( + request, "param", MediaType.Image + ) + dataset = None if ( @@ -776,6 +893,13 @@ def configured_project( media_type, normalized_ontology_by_media_type, ) + elif media_type == OntologyKind.ModelEvaluation: + project, ontology, dataset = _create_offline_mmc_project( + client, + rand_gen, + data_row_json_by_media_type[media_type], + normalized_ontology_by_media_type[media_type], + ) else: project, ontology, dataset = _create_project( client, @@ -827,6 +951,13 @@ def configured_project_by_global_key( media_type, normalized_ontology_by_media_type, ) + elif media_type == OntologyKind.ModelEvaluation: + project, ontology, dataset = _create_offline_mmc_project( + client, + rand_gen, + data_row_json_by_media_type[media_type], + normalized_ontology_by_media_type[media_type], + ) else: project, ontology, dataset = _create_project( client, @@ -988,6 +1119,31 @@ def prediction_id_mapping(request, normalized_ontology_by_media_type): return base_annotations +@pytest.fixture +def mmc_example_data_row_message_ids(mmc_data_row_url: str): + data_row_content = requests.get(mmc_data_row_url).json() + + human_id = next( + actor_id + for actor_id, actor_metadata in data_row_content["actors"].items() + if actor_metadata["role"] == "human" + ) + + return { + message_id: [ + { + "id": child_msg_id, + "model_config_name": data_row_content["actors"][ + data_row_content["messages"][child_msg_id]["actorId"] + ]["metadata"]["modelConfigName"], + } + for child_msg_id in message_metadata["childMessageIds"] + ] + for message_id, message_metadata in data_row_content["messages"].items() + if message_metadata["actorId"] == human_id + } + + # Each inference represents a feature type that adds to the base annotation created with prediction_id_mapping @pytest.fixture def polygon_inference(prediction_id_mapping): @@ -1303,6 +1459,31 @@ def checklist_inference_index(prediction_id_mapping): return checklists +@pytest.fixture +def checklist_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + checklists = [] + for feature in prediction_id_mapping: + if "checklist_index" not in feature: + return None + checklist = feature["checklist_index"].copy() + checklist.update( + { + "answers": [ + {"name": "first_checklist_answer"}, + {"name": "second_checklist_answer"}, + ], + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del checklist["tool"] + checklists.append(checklist) + return checklists + + @pytest.fixture def prompt_text_inference(prediction_id_mapping): prompt_texts = [] @@ -1333,6 +1514,45 @@ def radio_response_inference(prediction_id_mapping): return response_radios +@pytest.fixture +def radio_inference(prediction_id_mapping): + radios = [] + for feature in prediction_id_mapping: + if "radio" not in feature: + continue + radio = feature["radio"].copy() + radio.update( + { + "answer": {"name": "first_radio_answer"}, + } + ) + del radio["tool"] + radios.append(radio) + return radios + + +@pytest.fixture +def radio_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + radios = [] + for feature in prediction_id_mapping: + if "radio_index" not in feature: + continue + radio = feature["radio_index"].copy() + radio.update( + { + "answer": {"name": "first_radio_answer"}, + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del radio["tool"] + radios.append(radio) + return radios + + @pytest.fixture def checklist_response_inference(prediction_id_mapping): response_checklists = [] @@ -1402,6 +1622,28 @@ def text_inference_index(prediction_id_mapping): return texts +@pytest.fixture +def text_inference_index_mmc( + prediction_id_mapping, mmc_example_data_row_message_ids +): + texts = [] + for feature in prediction_id_mapping: + if "text_index" not in feature: + continue + text = feature["text_index"].copy() + text.update( + { + "answer": "free form text...", + "messageId": next( + iter(mmc_example_data_row_message_ids.keys()) + ), + } + ) + del text["tool"] + texts.append(text) + return texts + + @pytest.fixture def video_checklist_inference(prediction_id_mapping): checklists = [] @@ -1437,6 +1679,118 @@ def video_checklist_inference(prediction_id_mapping): return checklists +@pytest.fixture +def message_single_selection_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-single-selection" not in feature: + continue + selection = feature["message-single-selection"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-single-selection", + "data": { + "messageId": some_child_ids[0]["id"], + "parentMessageId": some_parent_id, + "modelConfigName": some_child_ids[0][ + "model_config_name" + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + +@pytest.fixture +def message_multi_selection_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-multi-selection" not in feature: + continue + selection = feature["message-multi-selection"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-multi-selection", + "data": { + "parentMessageId": some_parent_id, + "selectedMessages": [ + { + "messageId": child_id["id"], + "modelConfigName": child_id[ + "model_config_name" + ], + } + for child_id in some_child_ids + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + +@pytest.fixture +def message_ranking_inference( + prediction_id_mapping, mmc_example_data_row_message_ids +): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + res = [] + for feature in prediction_id_mapping: + if "message-ranking" not in feature: + continue + selection = feature["message-ranking"].copy() + selection.update( + { + "messageEvaluationTask": { + "format": "message-ranking", + "data": { + "parentMessageId": some_parent_id, + "rankedMessages": [ + { + "messageId": child_id["id"], + "modelConfigName": child_id[ + "model_config_name" + ], + "order": idx, + } + for idx, child_id in enumerate( + some_child_ids, start=1 + ) + ], + }, + } + } + ) + del selection["tool"] + res.append(selection) + + return res + + @pytest.fixture def annotations_by_media_type( polygon_inference, @@ -1456,6 +1810,13 @@ def annotations_by_media_type( checklist_response_inference, radio_response_inference, text_response_inference, + message_single_selection_inference, + message_multi_selection_inference, + message_ranking_inference, + checklist_inference_index_mmc, + radio_inference, + radio_inference_index_mmc, + text_inference_index_mmc, ): return { MediaType.Audio: [checklist_inference, text_inference], @@ -1493,6 +1854,17 @@ def annotations_by_media_type( checklist_response_inference, radio_response_inference, ], + OntologyKind.ModelEvaluation: [ + message_single_selection_inference, + message_multi_selection_inference, + message_ranking_inference, + radio_inference, + checklist_inference, + text_inference, + radio_inference_index_mmc, + checklist_inference_index_mmc, + text_inference_index_mmc, + ], } @@ -2162,6 +2534,125 @@ def expected_export_v2_llm_response_creation(): return expected_annotations +@pytest.fixture +def expected_exports_v2_mmc(mmc_example_data_row_message_ids): + some_parent_id, some_child_ids = next( + iter(mmc_example_data_row_message_ids.items()) + ) + + return { + "objects": [ + { + "name": "message-single-selection", + "annotation_kind": "MessageSingleSelection", + "classifications": [], + "selected_message": { + "message_id": some_child_ids[0]["id"], + "model_config_name": some_child_ids[0]["model_config_name"], + "parent_message_id": some_parent_id, + }, + }, + { + "name": "message-multi-selection", + "annotation_kind": "MessageMultiSelection", + "classifications": [], + "selected_messages": { + "messages": [ + { + "message_id": child_id["id"], + "model_config_name": child_id["model_config_name"], + } + for child_id in some_child_ids + ], + "parent_message_id": some_parent_id, + }, + }, + { + "name": "message-ranking", + "annotation_kind": "MessageRanking", + "classifications": [], + "ranked_messages": { + "ranked_messages": [ + { + "message_id": child_id["id"], + "model_config_name": child_id["model_config_name"], + "order": idx, + } + for idx, child_id in enumerate(some_child_ids, start=1) + ], + "parent_message_id": some_parent_id, + }, + }, + ], + "classifications": [ + { + "name": "radio", + "value": "radio", + "radio_answer": { + "name": "first_radio_answer", + "value": "first_radio_answer", + "classifications": [], + }, + }, + { + "name": "checklist", + "value": "checklist", + "checklist_answers": [ + { + "name": "first_checklist_answer", + "value": "first_checklist_answer", + "classifications": [], + }, + { + "name": "second_checklist_answer", + "value": "second_checklist_answer", + "classifications": [], + }, + ], + }, + { + "name": "text", + "value": "text", + "text_answer": {"content": "free form text..."}, + }, + { + "name": "radio_index", + "value": "radio_index", + "message_id": some_parent_id, + "conversational_radio_answer": { + "name": "first_radio_answer", + "value": "first_radio_answer", + "classifications": [], + }, + }, + { + "name": "checklist_index", + "value": "checklist_index", + "message_id": some_parent_id, + "conversational_checklist_answers": [ + { + "name": "first_checklist_answer", + "value": "first_checklist_answer", + "classifications": [], + }, + { + "name": "second_checklist_answer", + "value": "second_checklist_answer", + "classifications": [], + }, + ], + }, + { + "name": "text_index", + "value": "text_index", + "message_id": some_parent_id, + "conversational_text_answer": {"content": "free form text..."}, + }, + ], + "relationships": [], + } + + @pytest.fixture def exports_v2_by_media_type( expected_export_v2_image, @@ -2175,6 +2666,7 @@ def exports_v2_by_media_type( expected_export_v2_llm_prompt_response_creation, expected_export_v2_llm_prompt_creation, expected_export_v2_llm_response_creation, + expected_exports_v2_mmc, ): return { MediaType.Image: expected_export_v2_image, @@ -2188,6 +2680,7 @@ def exports_v2_by_media_type( MediaType.LLMPromptResponseCreation: expected_export_v2_llm_prompt_response_creation, MediaType.LLMPromptCreation: expected_export_v2_llm_prompt_creation, OntologyKind.ResponseCreation: expected_export_v2_llm_response_creation, + OntologyKind.ModelEvaluation: expected_exports_v2_mmc, } diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index f8f0c449a..9de67bd4e 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -41,6 +41,7 @@ def validate_iso_format(date_string: str): (MediaType.LLMPromptResponseCreation, GenericDataRowData), (MediaType.LLMPromptCreation, GenericDataRowData), (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), ], ) def test_generic_data_row_type_by_data_row_id( @@ -76,6 +77,7 @@ def test_generic_data_row_type_by_data_row_id( # (MediaType.LLMPromptResponseCreation, GenericDataRowData), # (MediaType.LLMPromptCreation, GenericDataRowData), (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), ], ) def test_generic_data_row_type_by_global_key( @@ -115,6 +117,7 @@ def test_generic_data_row_type_by_global_key( ), (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project"], ) @@ -191,6 +194,7 @@ def test_import_media_types( (MediaType.Document, MediaType.Document), (MediaType.Dicom, MediaType.Dicom), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project_by_global_key"], ) @@ -275,6 +279,7 @@ def test_import_media_types_by_global_key( ), (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project"], ) @@ -309,6 +314,7 @@ def test_import_mal_annotations( (MediaType.Document, MediaType.Document), (MediaType.Dicom, MediaType.Dicom), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), + (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], indirect=["configured_project_by_global_key"], ) From 24e07661a77f60190a31e0ee6077e04b65a373fe Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 17 Sep 2024 10:57:13 -0700 Subject: [PATCH 03/22] SDK release v.5.0.0 prep (#1823) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 10 ++++++++++ libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 2 +- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b9870b87a..a67a44a24 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '4.0.0' +release = '5.0.0' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index ae97086c6..b2d41b56d 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,4 +1,14 @@ # Changelog +# Version 5.0.0 (2024-09-16) +## Updated +* Set tasks_remaining_count to None LabelingServiceDashboard if labeling has not started ([#1817](https://github.com/Labelbox/labelbox-python/pull/1817)) +* Improve error messaging when creating LLM project with invalid dataset id parameter([#1799](https://github.com/Labelbox/labelbox-python/pull/1799)) +## Removed +* BREAKING CHANGE SDK methods for exports v1([#1800](https://github.com/Labelbox/labelbox-python/pull/1800)) +* BREAKING CHANGE Unused labelbox_v1 serialization package([#1803](https://github.com/Labelbox/labelbox-python/pull/1803)) +## Fixed +* Cuid dependencies that cause a crash if numpy is not installed ([#1807](https://github.com/Labelbox/labelbox-python/pull/1807)) + # Version 4.0.0 (2024-09-10) ## Added * BREAKING CHANGE for pydantic V1 users: Converted SDK to use pydantic V2([#1738](https://github.com/Labelbox/labelbox-python/pull/1738)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index 58ce3410a..f4c24af59 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "4.0.0" +version = "5.0.0" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 633e8f4c2..5b5ac1f67 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,6 +1,6 @@ name = "labelbox" -__version__ = "4.0.0" +__version__ = "5.0.0" from labelbox.client import Client from labelbox.schema.project import Project From 2faf9a10c068621e3a58a690b1dbbddbce0c0f25 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:01:28 -0700 Subject: [PATCH 04/22] Vb/merge 5.0.0 (#1826) Co-authored-by: Gabe <33893811+Gabefire@users.noreply.github.com> --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/orm/model.py | 1 - libs/labelbox/src/labelbox/schema/__init__.py | 21 +- .../labelbox/schema/bulk_import_request.py | 1004 ----------------- libs/labelbox/src/labelbox/schema/enums.py | 25 - libs/labelbox/src/labelbox/schema/project.py | 120 +- .../test_bulk_import_request.py | 258 ----- .../test_ndjson_validation.py | 53 +- .../classification_import_global_key.json | 54 - ...conversation_entity_import_global_key.json | 25 - .../data/assets/ndjson/image_import.json | 779 +------------ .../ndjson/image_import_global_key.json | 823 -------------- .../assets/ndjson/image_import_name_only.json | 810 +------------ .../ndjson/metric_import_global_key.json | 10 - .../assets/ndjson/pdf_import_global_key.json | 155 --- .../ndjson/polyline_import_global_key.json | 36 - .../ndjson/text_entity_import_global_key.json | 26 - .../ndjson/video_import_global_key.json | 166 --- .../serialization/ndjson/test_checklist.py | 26 - .../ndjson/test_classification.py | 108 +- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 - .../serialization/ndjson/test_document.py | 294 ++++- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 - .../serialization/ndjson/test_global_key.py | 125 +- .../data/serialization/ndjson/test_image.py | 203 +++- .../data/serialization/ndjson/test_metric.py | 170 ++- .../data/serialization/ndjson/test_mmc.py | 125 +- .../ndjson/test_ndlabel_subclass_matching.py | 19 - .../data/serialization/ndjson/test_nested.py | 236 +++- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 - .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 ++- .../data/serialization/ndjson/test_text.py | 10 - .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +++++++++++++- 39 files changed, 2380 insertions(+), 4767 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json delete mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 5b5ac1f67..f9b82b422 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,7 +6,6 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 84dcac774..1f3ee1d86 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,7 +386,6 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] - BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 03327e0d1..e57c04a29 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,29 +1,28 @@ -import labelbox.schema.asset_attachment -import labelbox.schema.bulk_import_request import labelbox.schema.annotation_import +import labelbox.schema.asset_attachment +import labelbox.schema.batch import labelbox.schema.benchmark +import labelbox.schema.catalog import labelbox.schema.data_row +import labelbox.schema.data_row_metadata import labelbox.schema.dataset +import labelbox.schema.iam_integration +import labelbox.schema.identifiable +import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service +import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology +import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project +import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook -import labelbox.schema.data_row_metadata -import labelbox.schema.batch -import labelbox.schema.iam_integration -import labelbox.schema.media_type -import labelbox.schema.identifiables -import labelbox.schema.identifiable -import labelbox.schema.catalog -import labelbox.schema.ontology_kind -import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py deleted file mode 100644 index 8e11f3261..000000000 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ /dev/null @@ -1,1004 +0,0 @@ -import json -import time -from uuid import UUID, uuid4 -import functools - -import logging -from pathlib import Path -from google.api_core import retry -from labelbox import parser -import requests -from pydantic import ( - ValidationError, - BaseModel, - Field, - field_validator, - model_validator, - ConfigDict, - StringConstraints, -) -from typing_extensions import Literal, Annotated -from typing import ( - Any, - List, - Optional, - BinaryIO, - Dict, - Iterable, - Tuple, - Union, - Type, - Set, - TYPE_CHECKING, -) - -from labelbox import exceptions as lb_exceptions -from labelbox import utils -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Relationship -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.serialization import serialize_labels -from labelbox.orm.model import Field as lb_Field - -if TYPE_CHECKING: - from labelbox import Project - from labelbox.types import Label - -NDJSON_MIME_TYPE = "application/x-ndjson" -logger = logging.getLogger(__name__) - -# TODO: Deprecate this library in place of labelimport and malprediction import library. - - -def _determinants(parent_cls: Any) -> List[str]: - return [ - k - for k, v in parent_cls.model_fields.items() - if v.json_schema_extra and "determinant" in v.json_schema_extra - ] - - -def _make_file_name(project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - -# TODO(gszpak): move it to client.py -def _make_request_data( - project_id: str, name: str, content_length: int, file_name: str -) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % query.results_query_part(BulkImportRequest) - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length, - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})), - } - - -def _send_create_file_command( - client, - request_data: dict, - file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str], -) -> dict: - response = client.execute(data=request_data, files={file_name: file_data}) - - if not response.get("createBulkImportRequest", None): - raise lb_exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" - % response.get("errors", None) - or response.get("error", None) - ) - - return response - - -class BulkImportRequest(DbObject): - """Represents the import job when importing annotations. - - Attributes: - name (str) - state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) - input_file_url (str): URL to your web-hosted NDJSON file - error_file_url (str): NDJSON that contains error messages for failed annotations - status_file_url (str): NDJSON that contains status for each annotation - created_at (datetime): UTC timestamp for date BulkImportRequest was created - - project (Relationship): `ToOne` relationship to Project - created_by (Relationship): `ToOne` relationship to User - """ - - name = lb_Field.String("name") - state = lb_Field.Enum(BulkImportRequestState, "state") - input_file_url = lb_Field.String("input_file_url") - error_file_url = lb_Field.String("error_file_url") - status_file_url = lb_Field.String("status_file_url") - created_at = lb_Field.DateTime("created_at") - - project = Relationship.ToOne("Project") - created_by = Relationship.ToOne("User", False, "created_by") - - @property - def inputs(self) -> List[Dict[str, Any]]: - """ - Inputs for each individual annotation uploaded. - This should match the ndjson annotations that you have uploaded. - - Returns: - Uploaded ndjson. - - * This information will expire after 24 hours. - """ - return self._fetch_remote_ndjson(self.input_file_url) - - @property - def errors(self) -> List[Dict[str, Any]]: - """ - Errors for each individual annotation uploaded. This is a subset of statuses - - Returns: - List of dicts containing error messages. Empty list means there were no errors - See `BulkImportRequest.statuses` for more details. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.error_file_url) - - @property - def statuses(self) -> List[Dict[str, Any]]: - """ - Status for each individual annotation uploaded. - - Returns: - A status for each annotation if the upload is done running. - See below table for more details - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - Field - - Description - * - uuid - - Specifies the annotation for the status row. - * - dataRow - - JSON object containing the Labelbox data row ID for the annotation. - * - status - - Indicates SUCCESS or FAILURE. - * - errors - - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.status_file_url) - - @functools.lru_cache() - def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: - """ - Fetches the remote ndjson file and caches the results. - - Args: - url (str): Can be any url pointing to an ndjson file. - Returns: - ndjson as a list of dicts. - """ - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.text) - - def refresh(self) -> None: - """Synchronizes values of all fields with the database.""" - query_str, params = query.get_single(BulkImportRequest, self.uid) - res = self.client.execute(query_str, params) - res = res[utils.camel_case(BulkImportRequest.type_name())] - self._set_field_values(res) - - def wait_till_done(self, sleep_time_seconds: int = 5) -> None: - self.wait_until_done(sleep_time_seconds) - - def wait_until_done(self, sleep_time_seconds: int = 5) -> None: - """Blocks import job until certain conditions are met. - - Blocks until the BulkImportRequest.state changes either to - `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, - periodically refreshing object's state. - - Args: - sleep_time_seconds (str): a time to block between subsequent API calls - """ - while self.state == BulkImportRequestState.RUNNING: - logger.info(f"Sleeping for {sleep_time_seconds} seconds...") - time.sleep(sleep_time_seconds) - self.__exponential_backoff_refresh() - - @retry.Retry( - predicate=retry.if_exception_type( - lb_exceptions.ApiLimitError, - lb_exceptions.TimeoutError, - lb_exceptions.NetworkError, - ) - ) - def __exponential_backoff_refresh(self) -> None: - self.refresh() - - @classmethod - def from_name( - cls, client, project_id: str, name: str - ) -> "BulkImportRequest": - """Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name} - response = client.execute(query_str, params=params) - return cls(client, response["bulkImportRequest"]) - - @classmethod - def create_from_url( - cls, client, project_id: str, name: str, url: str, validate=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - validate (bool): a flag indicating if there should be a validation - if `url` is valid ndjson - Returns: - BulkImportRequest object - """ - if validate: - logger.warn( - "Validation is turned on. The file will be downloaded locally and processed before uploading." - ) - res = requests.get(url) - data = parser.loads(res.text) - _validate_ndjson(data, client.get_project(project_id)) - - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls( - client, bulk_import_request_response["createBulkImportRequest"] - ) - - @classmethod - def create_from_objects( - cls, - client, - project_id: str, - name: str, - predictions: Union[Iterable[Dict], Iterable["Label"]], - validate=True, - ) -> "BulkImportRequest": - """ - Creates a `BulkImportRequest` from an iterable of dictionaries. - - Conforms to JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - validate (bool): a flag indicating if there should be a validation - if `predictions` is valid ndjson - Returns: - BulkImportRequest object - """ - if not isinstance(predictions, list): - raise TypeError( - f"annotations must be in a form of Iterable. Found {type(predictions)}" - ) - ndjson_predictions = serialize_labels(predictions) - - if validate: - _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - - data_str = parser.dumps(ndjson_predictions) - if not data_str: - raise ValueError("annotations cannot be empty") - - data = data_str.encode("utf-8") - file_name = _make_file_name(project_id, name) - request_data = _make_request_data( - project_id, name, len(data_str), file_name - ) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, - request_data=request_data, - file_name=file_name, - file_data=file_data, - ) - - return cls(client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file( - cls, client, project_id: str, name: str, file: Path, validate_file=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = _make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = _make_request_data( - project_id, name, content_length, file_name - ) - - with file.open("rb") as f: - if validate_file: - reader = parser.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - _validate_ndjson(reader, client.get_project(project_id)) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, request_data, file_name, file_data - ) - return cls(client, response_data["createBulkImportRequest"]) - - def delete(self) -> None: - """Deletes the import job and also any annotations created by this import. - - Returns: - None - """ - id_param = "bulk_request_id" - query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { - deleteBulkImportRequest(where: {id: $%s}) { - id - name - } - }""" % (id_param, id_param) - self.client.execute(query_str, {id_param: self.uid}) - - -def _validate_ndjson( - lines: Iterable[Dict[str, Any]], project: "Project" -) -> None: - """ - Client side validation of an ndjson object. - - Does not guarentee that an upload will succeed for the following reasons: - * We are not checking the data row types which will cause the following errors to slip through - * Missing frame indices will not causes an error for videos - * Uploaded annotations for the wrong data type will pass (Eg. entity on images) - * We are not checking bounds of an asset (Eg. frame index, image height, text location) - - Args: - lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines - project (Project): id of project for which predictions will be imported - - Raises: - MALValidationError: Raise for invalid NDJson - UuidError: Duplicate UUID in upload - """ - feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( - project.ontology() - ) - uids: Set[str] = set() - for idx, line in enumerate(lines): - try: - annotation = NDAnnotation(**line) - annotation.validate_instance( - feature_schemas_by_id, feature_schemas_by_name - ) - uuid = str(annotation.uuid) - if uuid in uids: - raise lb_exceptions.UuidError( - f"{uuid} already used in this import job, " - "must be unique for the project." - ) - uids.add(uuid) - except (ValidationError, ValueError, TypeError, KeyError) as e: - raise lb_exceptions.MALValidationError( - f"Invalid NDJson on line {idx}" - ) from e - - -# The rest of this file contains objects for MAL validation -def parse_classification(tool): - """ - Parses a classification from an ontology. Only radio, checklist, and text are supported for mal - - Args: - tool (dict) - - Returns: - dict - """ - if tool["type"] in ["radio", "checklist"]: - option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] - option_names = [r["value"] for r in tool["options"]] - return { - "tool": tool["type"], - "featureSchemaId": tool["featureSchemaId"], - "name": tool["name"], - "options": [*option_schema_ids, *option_names], - } - elif tool["type"] == "text": - return { - "tool": tool["type"], - "name": tool["name"], - "featureSchemaId": tool["featureSchemaId"], - } - - -def get_mal_schemas(ontology): - """ - Converts a project ontology to a dict for easier lookup during ndjson validation - - Args: - ontology (Ontology) - Returns: - Dict, Dict : Useful for looking up a tool from a given feature schema id or name - """ - - valid_feature_schemas_by_schema_id = {} - valid_feature_schemas_by_name = {} - for tool in ontology.normalized["tools"]: - classifications = [ - parse_classification(classification_tool) - for classification_tool in tool["classifications"] - ] - classifications_by_schema_id = { - v["featureSchemaId"]: v for v in classifications - } - classifications_by_name = {v["name"]: v for v in classifications} - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - valid_feature_schemas_by_name[tool["name"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - for tool in ontology.normalized["classifications"]: - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( - parse_classification(tool) - ) - valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) - return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name - - -class Bbox(BaseModel): - top: float - left: float - height: float - width: float - - -class Point(BaseModel): - x: float - y: float - - -class FrameLocation(BaseModel): - end: int - start: int - - -class VideoSupported(BaseModel): - # Note that frames are only allowed as top level inferences for video - frames: Optional[List[FrameLocation]] = None - - -# Base class for a special kind of union. -class SpecialUnion: - def __new__(cls, **kwargs): - return cls.build(kwargs) - - @classmethod - def __get_validators__(cls): - yield cls.build - - @classmethod - def get_union_types(cls): - if not issubclass(cls, SpecialUnion): - raise TypeError("{} must be a subclass of SpecialUnion") - - union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] - if len(union_types) < 1: - raise TypeError( - "Class {cls} should inherit from a union of objects to build" - ) - if len(union_types) > 1: - raise TypeError( - f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" - ) - return union_types[0].__args__[0].__args__ - - @classmethod - def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": - """ - Checks through all objects in the union to see which matches the input data. - Args: - data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union - raises: - KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion - ValidationError: Error while trying to construct a specific object in the union - - """ - if isinstance(data, BaseModel): - data = data.model_dump() - - top_level_fields = [] - max_match = 0 - matched = None - - for type_ in cls.get_union_types(): - determinate_fields = _determinants(type_) - top_level_fields.append(determinate_fields) - matches = sum([val in determinate_fields for val in data]) - if matches == len(determinate_fields) and matches > max_match: - max_match = matches - matched = type_ - - if matched is not None: - # These two have the exact same top level keys - if matched in [NDRadio, NDText]: - if isinstance(data["answer"], dict): - matched = NDRadio - elif isinstance(data["answer"], str): - matched = NDText - else: - raise TypeError( - f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" - ) - return matched(**data) - else: - raise KeyError( - f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." - ) - - @classmethod - def schema(cls): - results = {"definitions": {}} - for cl in cls.get_union_types(): - schema = cl.schema() - results["definitions"].update(schema.pop("definitions")) - results[cl.__name__] = schema - return results - - -class DataRow(BaseModel): - id: str - - -class NDFeatureSchema(BaseModel): - schemaId: Optional[str] = None - name: Optional[str] = None - - @model_validator(mode="after") - def most_set_one(self): - if self.schemaId is None and self.name is None: - raise ValueError( - "Must set either schemaId or name for all feature schemas" - ) - return self - - -class NDBase(NDFeatureSchema): - ontology_type: str - uuid: UUID - dataRow: DataRow - model_config = ConfigDict(extra="forbid") - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - if self.name: - if self.name not in valid_feature_schemas_by_name: - raise ValueError( - f"Name {self.name} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_name[self.name]["tool"] - ): - raise ValueError( - f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" - ) - - if self.schemaId: - if self.schemaId not in valid_feature_schemas_by_id: - raise ValueError( - f"Schema id {self.schemaId} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_id[self.schemaId]["tool"] - ): - raise ValueError( - f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" - ) - - def validate_instance( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - self.validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - - -###### Classifications ###### - - -class NDText(NDBase): - ontology_type: Literal["text"] = "text" - answer: str = Field(json_schema_extra={"determinant": True}) - # No feature schema to check - - -class NDChecklist(VideoSupported, NDBase): - ontology_type: Literal["checklist"] = "checklist" - answers: List[NDFeatureSchema] = Field( - json_schema_extra={"determinant": True} - ) - - @field_validator("answers", mode="before") - def validate_answers(cls, value, field): - # constr not working with mypy. - if not len(value): - raise ValueError("Checklist answers should not be empty") - return value - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - # Test top level feature schema for this tool - super(NDChecklist, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - # Test the feature schemas provided to the answer field - if len( - set([answer.name or answer.schemaId for answer in self.answers]) - ) != len(self.answers): - raise ValueError( - f"Duplicated featureSchema found for checklist {self.uuid}" - ) - for answer in self.answers: - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if answer.name not in options and answer.schemaId not in options: - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" - ) - - -class NDRadio(VideoSupported, NDBase): - ontology_type: Literal["radio"] = "radio" - answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDRadio, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if ( - self.answer.name not in options - and self.answer.schemaId not in options - ): - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" - ) - - -# A union with custom construction logic to improve error messages -class NDClassification( - SpecialUnion, - Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore -): ... - - -###### Tools ###### - - -class NDBaseTool(NDBase): - classifications: List[NDClassification] = [] - - # This is indepdent of our problem - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDBaseTool, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - for classification in self.classifications: - classification.validate_feature_schemas( - valid_feature_schemas_by_name[self.name][ - "classificationsBySchemaId" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsBySchemaId" - ], - valid_feature_schemas_by_name[self.name][ - "classificationsByName" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsByName" - ], - ) - - @field_validator("classifications", mode="before") - def validate_subclasses(cls, value, field): - # Create uuid and datarow id so we don't have to define classification objects twice - # This is caused by the fact that we require these ids for top level classifications but not for subclasses - results = [] - dummy_id = "child".center(25, "_") - for row in value: - results.append( - {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} - ) - return results - - -class NDPolygon(NDBaseTool): - ontology_type: Literal["polygon"] = "polygon" - polygon: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("polygon") - def is_geom_valid(cls, v): - if len(v) < 3: - raise ValueError( - f"A polygon must have at least 3 points to be valid. Found {v}" - ) - return v - - -class NDPolyline(NDBaseTool): - ontology_type: Literal["line"] = "line" - line: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("line") - def is_geom_valid(cls, v): - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - return v - - -class NDRectangle(NDBaseTool): - ontology_type: Literal["rectangle"] = "rectangle" - bbox: Bbox = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class NDPoint(NDBaseTool): - ontology_type: Literal["point"] = "point" - point: Point = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class EntityLocation(BaseModel): - start: int - end: int - - -class NDTextEntity(NDBaseTool): - ontology_type: Literal["named-entity"] = "named-entity" - location: EntityLocation = Field(json_schema_extra={"determinant": True}) - - @field_validator("location") - def is_valid_location(cls, v): - if isinstance(v, BaseModel): - v = v.model_dump() - - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - if v["start"] < 0: - raise ValueError(f"Text location must be positive. Found {v}") - if v["start"] > v["end"]: - raise ValueError( - f"Text start location must be less or equal than end. Found {v}" - ) - return v - - -class RLEMaskFeatures(BaseModel): - counts: List[int] - size: List[int] - - @field_validator("counts") - def validate_counts(cls, counts): - if not all([count >= 0 for count in counts]): - raise ValueError( - "Found negative value for counts. They should all be zero or positive" - ) - return counts - - @field_validator("size") - def validate_size(cls, size): - if len(size) != 2: - raise ValueError( - f"Mask `size` should have two ints representing height and with. Found : {size}" - ) - if not all([count > 0 for count in size]): - raise ValueError( - f"Mask `size` should be a postitive int. Found : {size}" - ) - return size - - -class PNGMaskFeatures(BaseModel): - # base64 encoded png bytes - png: str - - -class URIMaskFeatures(BaseModel): - instanceURI: str - colorRGB: Union[List[int], Tuple[int, int, int]] - - @field_validator("colorRGB") - def validate_color(cls, colorRGB): - # Does the dtype matter? Can it be a float? - if not isinstance(colorRGB, (tuple, list)): - raise ValueError( - f"Received color that is not a list or tuple. Found : {colorRGB}" - ) - elif len(colorRGB) != 3: - raise ValueError( - f"Must provide RGB values for segmentation colors. Found : {colorRGB}" - ) - elif not all([0 <= color <= 255 for color in colorRGB]): - raise ValueError( - f"All rgb colors must be between 0 and 255. Found : {colorRGB}" - ) - return colorRGB - - -class NDMask(NDBaseTool): - ontology_type: Literal["superpixel"] = "superpixel" - mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( - json_schema_extra={"determinant": True} - ) - - -# A union with custom construction logic to improve error messages -class NDTool( - SpecialUnion, - Type[ # type: ignore - Union[ - NDMask, - NDTextEntity, - NDPoint, - NDRectangle, - NDPolyline, - NDPolygon, - ] - ], -): ... - - -class NDAnnotation( - SpecialUnion, - Type[Union[NDTool, NDClassification]], # type: ignore -): - @classmethod - def build(cls: Any, data) -> "NDBase": - if not isinstance(data, dict): - raise ValueError("value must be dict") - errors = [] - for cl in cls.get_union_types(): - try: - return cl(**data) - except KeyError as e: - errors.append(f"{cl.__name__}: {e}") - - raise ValueError( - "Unable to construct any annotation.\n{}".format("\n".join(errors)) - ) - - @classmethod - def schema(cls): - data = {"definitions": {}} - for type_ in cls.get_union_types(): - schema_ = type_.schema() - data["definitions"].update(schema_.pop("definitions")) - data[type_.__name__] = schema_ - return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index 6f8aebc58..dfc87c8a4 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,31 +1,6 @@ from enum import Enum -class BulkImportRequestState(Enum): - """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). - - If you are not usinig MEA continue using BulkImportRequest. - AnnotationImports are in beta and will change soon. - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - State - - Description - * - RUNNING - - Indicates that the import job is not done yet. - * - FAILED - - Indicates the import job failed. Check `BulkImportRequest.errors` for more information - * - FINISHED - - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information - """ - - RUNNING = "RUNNING" - FAILED = "FAILED" - FINISHED = "FINISHED" - - class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f8876f7c4..f2de4db5e 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,36 +5,29 @@ import warnings from collections import namedtuple from datetime import datetime, timezone -from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, - Iterable, List, Optional, Tuple, - TypeVar, Union, overload, ) -from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -import requests -from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -46,7 +39,6 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, - validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -63,7 +55,6 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -72,7 +63,7 @@ ) if TYPE_CHECKING: - from labelbox import BulkImportRequest + pass DataRowPriority = int @@ -579,7 +570,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -814,7 +805,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1088,8 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1436,7 +1426,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1488,33 +1478,6 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] - def bulk_import_requests(self) -> PaginatedCollection: - """Returns bulk import request objects which are used in model-assisted labeling. - These are returned with the oldest first, and most recent last. - """ - - id_param = "project_id" - query_str = """query ListAllImportRequestsPyApi($%s: ID!) { - bulkImportRequests ( - where: { projectId: $%s } - skip: %%d - first: %%d - ) { - %s - } - }""" % ( - id_param, - id_param, - query.results_query_part(Entity.BulkImportRequest), - ) - return PaginatedCollection( - self.client, - query_str, - {id_param: str(self.uid)}, - ["bulkImportRequests"], - Entity.BulkImportRequest, - ) - def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1629,7 +1592,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1639,77 +1602,6 @@ def _wait_for_task(self, task_id: str) -> Task: return task - def upload_annotations( - self, - name: str, - annotations: Union[str, Path, Iterable[Dict]], - validate: bool = False, - ) -> "BulkImportRequest": # type: ignore - """Uploads annotations to a new Editor project. - - Args: - name (str): name of the BulkImportRequest job - annotations (str or Path or Iterable): - url that is publicly accessible by Labelbox containing an - ndjson file - OR local path to an ndjson file - OR iterable of annotation rows - validate (bool): - Whether or not to validate the payload before uploading. - Returns: - BulkImportRequest - """ - - if isinstance(annotations, str) or isinstance(annotations, Path): - - def _is_url_valid(url: Union[str, Path]) -> bool: - """Verifies that the given string is a valid url. - - Args: - url: string to be checked - Returns: - True if the given url is valid otherwise False - - """ - if isinstance(url, Path): - return False - parsed = urlparse(url) - return bool(parsed.scheme) and bool(parsed.netloc) - - if _is_url_valid(annotations): - return Entity.BulkImportRequest.create_from_url( - client=self.client, - project_id=self.uid, - name=name, - url=str(annotations), - validate=validate, - ) - else: - path = Path(annotations) - if not path.exists(): - raise FileNotFoundError( - f"{annotations} is not a valid url nor existing local file" - ) - return Entity.BulkImportRequest.create_from_local_file( - client=self.client, - project_id=self.uid, - name=name, - file=path, - validate_file=validate, - ) - elif isinstance(annotations, Iterable): - return Entity.BulkImportRequest.create_from_objects( - client=self.client, - project_id=self.uid, - name=name, - predictions=annotations, # type: ignore - validate=validate, - ) - else: - raise ValueError( - f"Invalid annotations given of type: {type(annotations)}" - ) - def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index 9abae1422..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,258 +0,0 @@ -from unittest.mock import patch -import uuid -from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index a0df559fc..9e8963a26 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,24 +1,8 @@ -from labelbox.schema.media_type import MediaType -from labelbox.schema.project import Project import pytest - -from labelbox import parser -from pytest_cases import parametrize, fixture_ref +from pytest_cases import fixture_ref, parametrize from labelbox.exceptions import MALValidationError -from labelbox.schema.bulk_import_request import ( - NDChecklist, - NDClassification, - NDMask, - NDPolygon, - NDPolyline, - NDRadio, - NDRectangle, - NDText, - NDTextEntity, - NDTool, - _validate_ndjson, -) +from labelbox.schema.media_type import MediaType """ - These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed @@ -191,39 +175,6 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) -def test_validate_ndjson(tmp_path, configured_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - configured_project.upload_annotations( - name="name", annotations=str(file_path), validate=True - ) - - -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - repeat_uuid[0]["uuid"] = "test_uuid" - repeat_uuid[1]["uuid"] = "test_uuid" - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json deleted file mode 100644 index 4de15e217..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "confidence": 0.8, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [ - { - "schemaId": "ckrb1sfl8099e0y919v260awv", - "confidence": 0.82, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } - ], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json deleted file mode 100644 index 83a95e5bf..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json +++ /dev/null @@ -1,25 +0,0 @@ -[{ - "location": { - "start": 67, - "end": 128 - }, - "messageId": "some-message-id", - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] -}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 91563b8ae..75fe36e44 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,16 +8,17 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - } + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + }, + "classifications": [] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -28,20 +29,17 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + }, + "classifications": [] }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -50,762 +48,39 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 + "x": 10.0, + "y": 20.0 }, { - "x": 1119, - "y": 934 + "x": 15.0, + "y": 20.0 }, { - "x": 1118, - "y": 935 + "x": 20.0, + "y": 25.0 }, { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json deleted file mode 100644 index 591e40cf6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json +++ /dev/null @@ -1,823 +0,0 @@ -[ - { - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "schemaId": "ckrazcueb16og0z6609jj7y3y", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.851, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - { - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "schemaId": "ckrazcuec16ok0z66f956apb7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.834, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } - }, - { - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "schemaId": "ckrazcuec16oi0z66dzrd8pfl", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.986, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "polygon": [ - { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 - } - ] - }, - { - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "schemaId": "ckrazcuec16om0z66bhhh4tp7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "point": { - "x": 2122, - "y": 1457 - } - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 82be4cdab..466a03594 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,826 +1,86 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "box a", + "name": "ckrazcueb16og0z6609jj7y3y", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "confidence": 0.854, + "classifications": [], + "confidence": 0.851, "customMetrics": [ { "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.7 + "value": 0.4 } - ] + ], + "bbox": { + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "mask a", + "name": "ckrazcuec16ok0z66f956apb7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - }, - "confidence": 0.685, + "classifications": [], + "confidence": 0.834, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 - }, - { - "name": "customMetric2", - "value": 0.9 + "value": 0.3 } - ] + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + } }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "polygon a", + "name": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.71, + "confidence": 0.986, "customMetrics": [ { "name": "customMetric1", - "value": 0.1 + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 + "x": 10.0, + "y": 20.0 }, { - "x": 1099, - "y": 911 + "x": 15.0, + "y": 20.0 }, { - "x": 1100, - "y": 911 + "x": 20.0, + "y": 25.0 }, { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "point a", + "name": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.77, - "customMetrics": [ - { - "name": "customMetric2", - "value": 1.2 - } - ], "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json deleted file mode 100644 index 31be5a4c7..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "aggregation": "ARITHMETIC_MEAN", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "metricValue": 0.1 - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json deleted file mode 100644 index f4b4894f6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json +++ /dev/null @@ -1,155 +0,0 @@ -[{ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 4, - "unit": "POINTS", - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 162.73, - "left": 32.45, - "height": 388.16999999999996, - "width": 101.66000000000001 - } -}, { - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "bbox": { - "top": 223.26, - "left": 251.42, - "height": 457.03999999999996, - "width": 186.78 - } -}, { - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 6, - "unit": "POINTS", - "confidence": 0.99, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 32.52, - "left": 218.17, - "height": 231.73, - "width": 110.56000000000003 - } -}, { - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "confidence": 0.89, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 117.39, - "left": 4.25, - "height": 456.9200000000001, - "width": 164.83 - } -}, { - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 8, - "unit": "POINTS", - "bbox": { - "top": 82.13, - "left": 217.28, - "height": 279.76, - "width": 82.43000000000004 - } -}, { - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 3, - "unit": "POINTS", - "bbox": { - "top": 298.12, - "left": 83.34, - "height": 203.83000000000004, - "width": 0.37999999999999545 - } -}, -{ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "named_entity", - "classifications": [], - "textSelections": [ - { - "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - "tokenIds": [ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c" - ], - "page": 1 - } - ] -} -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json deleted file mode 100644 index d6a9eecbd..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - { - "line": [ - { - "x": 2534.353, - "y": 249.471 - }, - { - "x": 2429.492, - "y": 182.092 - }, - { - "x": 2294.322, - "y": 221.962 - } - ], - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-line", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.58, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json deleted file mode 100644 index 1f26d8dc8..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "location": { - "start": 67, - "end": 128 - }, - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json deleted file mode 100644 index 11e0753d9..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json +++ /dev/null @@ -1,166 +0,0 @@ -[{ - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" - }, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{ - "start": 30, - "end": 35 - }, { - "start": 50, - "end": 51 - }] -}, { - "answer": [{ - "schemaId": "ckrb1sfl8099e0y919v260awv" - }], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{ - "start": 0, - "end": 5 - }] -}, { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" -}, { - "classifications": [], - "schemaId": - "cl5islwg200gfci6g0oitaypu", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [{ - "keyframes": [{ - "frame": 1, - "line": [{ - "x": 10.0, - "y": 10.0 - }, { - "x": 100.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }, { - "frame": 5, - "line": [{ - "x": 15.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 8, - "line": [{ - "x": 100.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [{ - "keyframes": [{ - "frame": 1, - "point": { - "x": 10.0, - "y": 10.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 5, - "point": { - "x": 50.0, - "y": 50.0 - }, - "classifications": [] - }, { - "frame": 10, - "point": { - "x": 10.0, - "y": 50.0 - }, - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5iw0roz00lwci6g5jni62vs", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [{ - "keyframes": [{ - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0 - }, - "classifications": [] - }, { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0 - }, - "classifications": [] - }] - }] -}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 0bc3c8924..59f568c75 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,13 +37,6 @@ def test_serialization_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_classification(): label = Label( @@ -134,12 +127,6 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested(): label = Label( @@ -233,13 +220,6 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested_2(): label = Label( @@ -330,9 +310,3 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 8dcb17f0b..82adce99c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,15 +1,73 @@ import json +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + Radio, + Text, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ClassificationAnnotation, + ClassificationAnswer, +) +from labelbox.data.mixins import CustomMetric + def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.8, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.82, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data @@ -18,6 +76,48 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + name="classification a", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="choice 1", + ), + ), + ), + ClassificationAnnotation( + name="classification b", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.945, + name="choice 2", + ) + ], + ), + ), + ClassificationAnnotation( + name="classification c", + extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index f7da9181b..561f9ce86 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,8 +1,12 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -99,25 +103,62 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - deserialized_label = list(NDJsonConverter().deserialize(ndjson)) - deserialized_label[0].annotations[0].extra.pop("uuid") - assert deserialized_label[0].model_dump(exclude_none=True) == label[ - 0 - ].model_dump(exclude_none=True) +def test_conversation_entity_import(): + with open( + "tests/data/assets/ndjson/conversation_entity_import.json", "r" + ) as file: + data = json.load(file) -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/conversation_entity_import.json", + label = lb_types.Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) + assert res == data + + +def test_conversation_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - ], -) -def test_conversation_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = lb_types.Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, extra={}, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 333c00250..999e1bda5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,67 +1,29 @@ -from copy import copy -import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import ( - NDDicomSegments, - NDDicomSegment, - NDDicomLine, -) - -""" -Data gen prompt test data -""" - -prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), -) - -prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, -} - -data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], -) - -""" -Prompt annotation test -""" def test_serialize_label(): - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) - # Remove uuid field since this is a random value that can not be specified also meant for relationships - del serialized_label["uuid"] - assert serialized_label == prompt_text_ndjson - - -def test_deserialize_label(): - deserialized_label = next( - NDJsonConverter().deserialize([prompt_text_ndjson]) + prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + extra={"uuid": "test"}, + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), ) - if hasattr(deserialized_label.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized_label.annotations[0].extra = {} - assert deserialized_label.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "test", + } + + data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], + ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) -def test_serialize_deserialize_label(): - serialized = list(NDJsonConverter.serialize([data_gen_label])) - deserialized = next(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized.annotations[0].extra = {} - assert deserialized.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + assert serialized_label == prompt_text_ndjson diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 633214367..762891aa2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,6 +1,5 @@ from copy import copy import pytest -import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -181,28 +180,3 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson - - -@pytest.mark.parametrize("label, ndjson", labels_ndjsons) -def test_deserialize_label(label, ndjson): - deserialized_label = next(NDJsonConverter().deserialize([ndjson])) - if hasattr(deserialized_label.annotations[0], "extra"): - deserialized_label.annotations[0].extra = {} - for i, annotation in enumerate(deserialized_label.annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value - - -@pytest.mark.parametrize("label", labels) -def test_serialize_deserialize_label(label): - serialized = list(NDJsonConverter.serialize([label])) - deserialized = list(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized[0].annotations[0], "extra"): - deserialized[0].annotations[0].extra = {} - for i, annotation in enumerate(deserialized[0].annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index 5fe6a9789..a0897ad9f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,6 +1,19 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + RectangleUnit, + Point, + DocumentRectangle, + DocumentEntity, + DocumentTextSelection, +) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -53,10 +66,144 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.89, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_with_name_only(): @@ -65,26 +212,135 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id=None, + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.74, + name="boxy", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson - - -def test_pdf_bbox_deserialize(): - deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) - deserialized[0].annotations[0].extra = {} - assert ( - deserialized[0].annotations[0].value - == bbox_labels[0].annotations[0].value - ) - assert ( - deserialized[0].annotations[0].name - == bbox_labels[0].annotations[0].name - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 4adcd9935..1ab678cde 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,8 +9,6 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", - file_path=None, - frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -22,6 +20,7 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", + "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -588,31 +587,4 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - manual_label = video_serialized_bbox_label() - - for key in label.keys(): - # ignore uuid because we randomize if there was none - if key != "uuid": - assert label[key] == manual_label[key] - - assert len(label["segments"]) == 2 - assert len(label["segments"][0]["keyframes"]) == 2 - assert len(label["segments"][1]["keyframes"]) == 4 - - # #converts back only the keyframes. should be the sum of all prev segments - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - assert len(label.annotations) == 6 - - -def test_confidence_is_ignored(): - label = video_bbox_label() - serialized_labels = NDJsonConverter.serialize([label]) - label = next(serialized_labels) - label["confidence"] = 0.453 - label["segments"][0]["confidence"] = 0.453 - - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - for annotation in label.annotations: - assert annotation.confidence is None + assert label == video_serialized_bbox_label() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 84c017497..349be13a8 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,16 +34,6 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "text_answer" - assert annotation_value.confidence == 0.5 - def test_nested_serialization(): label = Label( @@ -102,19 +92,3 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - answer = annotation.value.answer[0] - assert answer.confidence == 0.9 - assert answer.name == "first_answer" - - classification_answer = answer.classifications[0].value.answer - assert classification_answer.confidence == 0.8 - assert classification_answer.name == "first_sub_radio_answer" - - sub_classification_answer = classification_answer.classifications[0].value - assert type(sub_classification_answer) is Text - assert sub_classification_answer.answer == "nested answer" - assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index 2b3fa7f8c..d104a691e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,73 +1,74 @@ -import json -import pytest - -from labelbox.data.serialization.ndjson.classification import NDRadio - +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import NDLine - - -def round_dict(data): - if isinstance(data, dict): - for key in data: - if isinstance(data[key], float): - data[key] = int(data[key]) - elif isinstance(data[key], dict): - data[key] = round_dict(data[key]) - elif isinstance(data[key], (list, tuple)): - data[key] = [round_dict(r) for r in data[key]] +from labelbox.types import ( + Label, + ClassificationAnnotation, + Radio, + ClassificationAnswer, +) - return data +def test_generic_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/classification_import_global_key.json", - "tests/data/assets/ndjson/metric_import_global_key.json", - "tests/data/assets/ndjson/polyline_import_global_key.json", - "tests/data/assets/ndjson/text_entity_import_global_key.json", - "tests/data/assets/ndjson/conversation_entity_import_global_key.json", - ], -) -def test_many_types(filename: str): - with open(filename, "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data - f.close() + label = Label( + data=GenericDataRowData( + global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) + res = list(NDJsonConverter.serialize([label])) -def test_image(): - with open( - "tests/data/assets/ndjson/image_import_global_key.json", "r" - ) as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() + assert res == expected -def test_pdf(): - with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() +def test_dict_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] + label = Label( + data={ + "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", + }, + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) -def test_video(): - with open( - "tests/data/assets/ndjson/video_import_global_key.json", "r" - ) as f: - data = json.load(f) + res = list(NDJsonConverter.serialize([label])) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] - f.close() + assert res == expected diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 1729e1f46..d67acb9c3 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,4 +1,8 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -10,6 +14,7 @@ ImageData, MaskData, ) +from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -29,12 +34,74 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_image_with_name_only(): @@ -43,11 +110,74 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + name="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + name="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + name="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + name="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_mask(): @@ -57,10 +187,11 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], + "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -68,16 +199,54 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [255, 0, 0], + "colorRGB": (255, 0, 0), }, + "classifications": [], }, ] - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) + mask_numpy = mask_numpy.astype(np.uint8) + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.8, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Mask( + mask=MaskData(arr=mask_numpy), + color=(1, 1, 1), + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + extra={}, + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=(255, 0, 0), + ), + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + + assert res == data def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 45c5c67bf..40e098405 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,38 +1,166 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.metrics.confusion_matrix import ( + ConfusionMatrixMetric, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ScalarMetric, + ScalarMetricAggregation, + ConfusionMatrixAggregation, +) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert reserialized == data + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + assert res == data def test_custom_scalar_metric(): - with open( - "tests/data/assets/ndjson/custom_scalar_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: 0.1, 0.2: 0.5}, + "metricName": "custom_iou", + "aggregation": "SUM", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value=0.1, + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value={"0.1": 0.1, "0.2": 0.5}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert res == data def test_custom_confusion_matrix_metric(): - with open( - "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (1, 1, 2, 3), + "metricName": "50%_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (0, 1, 2, 5), + "metricName": "50%_iou", + "featureName": "sample_class", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + "metricName": "50%_iou", + "aggregation": "CONFUSION_MATRIX", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ConfusionMatrixMetric( + value=(1, 1, 2, 3), + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value=(0, 1, 2, 5), + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert data == res diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 69594ff73..202f793fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,32 +1,125 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest from labelbox.data.serialization import NDJsonConverter +from labelbox.types import ( + Label, + MessageEvaluationTaskAnnotation, + MessageSingleSelectionTask, + MessageMultiSelectionTask, + MessageInfo, + OrderedMessageInfo, + MessageRankingTask, +) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - deserialized = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(deserialized)) + labels = [ + Label( + data=GenericDataRowData( + uid="cnjencjencjfencvj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="single-selection", + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + value=MessageSingleSelectionTask( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + parent_message_id="clxfznjb800073b6v43ppx9ca", + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cfcerfvergerfefj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="multi-selection", + extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, + value=MessageMultiSelectionTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + selected_messages=[ + MessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + ) + ], + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=2, + ), + ], + ), + ) + ], + ), + ] - assert data == reserialized + res = list(NDJsonConverter.serialize(labels)) + assert res == data -def test_mesage_ranking_task_wrong_order_serialization(): - with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: - data = json.load(file) - - some_ranking_task = next( - task - for task in data - if task["messageEvaluationTask"]["format"] == "message-ranking" - ) - some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ - "order" - ] = 3 +def test_mesage_ranking_task_wrong_order_serialization(): with pytest.raises(ValueError): - list(NDJsonConverter.deserialize([some_ranking_task])) + ( + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={ + "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" + }, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + ], + ), + ) + ], + ), + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py deleted file mode 100644 index 790bd87b3..000000000 --- a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from labelbox.data.serialization.ndjson.label import NDLabel -from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle -import pytest - - -def test_bad_annotation_input(): - data = [{"test": 3}] - with pytest.raises(ValueError): - NDLabel(**{"annotations": data}) - - -def test_correct_annotation_input(): - with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: - data = json.load(f) - assert isinstance( - NDLabel(**{"annotations": [data[0]]}).annotations[0], - NDDocumentRectangle, - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index e0f0df0e6..3633c9cbe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,13 +1,135 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Rectangle, + Point, + ClassificationAnnotation, + Radio, + ClassificationAnswer, + Text, + Checklist, +) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.34, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "5d03213e-4408-456c-9eca-cf0723202961", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.894, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={}, + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -16,6 +138,112 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="box a", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.811, + name="first answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box b", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification b", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.815, + name="second answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification c", + value=Checklist( + answer=[ + ClassificationAnswer( + name="third answer", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="a string", + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index 97d48a14e..cd11d97fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,18 +1,76 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ObjectAnnotation, Point, Line, Label -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/polyline_without_confidence_import.json", - "tests/data/assets/ndjson/polyline_import.json", - ], -) -def test_polyline_import(filename: str): - with open(filename, "r") as file: +def test_polyline_import_with_confidence(): + with open( + "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" + ) as file: + data = json.load(file) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data + + +def test_polyline_import_without_confidence(): + with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.58, + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index bd80f9267..4458e335c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -40,14 +39,6 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_radio_classification(): label = Label( @@ -101,10 +92,3 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert res.annotations[0].model_dump( - exclude_none=True - ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 66630dbb5..0e42ab152 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,6 +1,10 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -8,8 +12,26 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + }, + value=Rectangle( + start=Point(x=38.0, y=28.0), + end=Point(x=81.0, y=69.0), + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -39,8 +61,6 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, }, ) @@ -48,8 +68,9 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + + assert res == data def test_rectangle_mixed_start_end_points(): @@ -76,17 +97,13 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, - }, + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + assert res == data def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index f33719035..235b66957 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,16 +1,135 @@ import json -from uuid import uuid4 -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Point, + Rectangle, + RelationshipAnnotation, + Relationship, +) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) + res = [ + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + extra={}, + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl-DIFFERENT", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + ] res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -44,29 +163,3 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] - - -def test_relationship_nonexistent_object(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - relationship_annotation = data[2] - source_uuid = relationship_annotation["relationship"]["source"] - target_uuid = str(uuid4()) - relationship_annotation["relationship"]["target"] = target_uuid - error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" - - with pytest.raises(ValueError, match=error_msg): - list(NDJsonConverter.deserialize(data)) - - -def test_relationship_duplicate_uuids(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - source, target = data[0], data[1] - target["uuid"] = source["uuid"] - error_msg = f"UUID '{source['uuid']}' is not unique" - - with pytest.raises(AssertionError, match=error_msg): - list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index d5e81c51a..21db389cb 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,7 +1,5 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( - ClassificationAnswer, - Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -34,11 +32,3 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index 3e856f001..fb93f15d4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,21 +1,68 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, TextEntity + + +def test_text_entity_import(): + with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: + data = json.load(file) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/text_entity_import.json", +def test_text_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - ], -) -def test_text_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index c7a6535c4..4fba5c2ca 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json -from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, + Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,8 +13,10 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import VideoObjectAnnotation -from labelbox import parser +from labelbox.data.annotation_types.video import ( + VideoClassificationAnnotation, + VideoObjectAnnotation, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -24,15 +26,275 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_name_only(): @@ -40,16 +302,274 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + name="question 3", + extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_classification_global_subclassifications(): @@ -67,7 +587,6 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( - name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -94,7 +613,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = nested_checklist_annotation_ndjson = { + expected_second_annotation = { "name": "nested_checklist_question", "answer": [ { @@ -116,12 +635,6 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - assert annotation.name == label.annotations[i].name - def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -287,14 +800,6 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_video_classification_point(): bbox_annotation = [ @@ -445,13 +950,6 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - def test_video_classification_frameline(): bbox_annotation = [ @@ -619,9 +1117,289 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value + +[ + { + "answer": "a value", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 5, "start": 0}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5islwg200gfci6g0oitaypu", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + { + "classifications": [], + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + } + ] + }, + ], + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + } + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + }, + { + "classifications": [], + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + }, + ] + }, + ], + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "segments": [ + { + "keyframes": [ + { + "bbox": { + "height": 100.0, + "left": 5.0, + "top": 10.0, + "width": 150.0, + }, + "classifications": [], + "frame": 1, + }, + { + "bbox": { + "height": 50.0, + "left": 5.0, + "top": 30.0, + "width": 150.0, + }, + "classifications": [], + "frame": 5, + }, + ] + }, + { + "keyframes": [ + { + "bbox": { + "height": 400.0, + "left": 200.0, + "top": 300.0, + "width": 150.0, + }, + "classifications": [], + "frame": 10, + } + ] + }, + ], + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + }, +] + +[ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{"start": 0, "end": 5}], + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "classifications": [], + "schemaId": "cl5islwg200gfci6g0oitaypu", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + { + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + } + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + "classifications": [], + } + ] + }, + { + "keyframes": [ + { + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + "classifications": [], + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + "classifications": [], + }, + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0, + }, + "classifications": [], + }, + { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0, + }, + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0, + }, + "classifications": [], + } + ] + }, + ], + }, +] From 2c0c6773f8abdac0928a325d75f709892a92a13d Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 18 Sep 2024 09:22:02 -0700 Subject: [PATCH 05/22] Revert "Vb/merge 5.0.0 (#1826)" (#1827) --- libs/labelbox/src/labelbox/__init__.py | 1 + libs/labelbox/src/labelbox/orm/model.py | 1 + libs/labelbox/src/labelbox/schema/__init__.py | 21 +- .../labelbox/schema/bulk_import_request.py | 1004 +++++++++++++++++ libs/labelbox/src/labelbox/schema/enums.py | 25 + libs/labelbox/src/labelbox/schema/project.py | 120 +- .../test_bulk_import_request.py | 258 +++++ .../test_ndjson_validation.py | 53 +- .../classification_import_global_key.json | 54 + ...conversation_entity_import_global_key.json | 25 + .../data/assets/ndjson/image_import.json | 779 ++++++++++++- .../ndjson/image_import_global_key.json | 823 ++++++++++++++ .../assets/ndjson/image_import_name_only.json | 810 ++++++++++++- .../ndjson/metric_import_global_key.json | 10 + .../assets/ndjson/pdf_import_global_key.json | 155 +++ .../ndjson/polyline_import_global_key.json | 36 + .../ndjson/text_entity_import_global_key.json | 26 + .../ndjson/video_import_global_key.json | 166 +++ .../serialization/ndjson/test_checklist.py | 26 + .../ndjson/test_classification.py | 108 +- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 + .../serialization/ndjson/test_document.py | 294 +---- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 + .../serialization/ndjson/test_global_key.py | 125 +- .../data/serialization/ndjson/test_image.py | 203 +--- .../data/serialization/ndjson/test_metric.py | 170 +-- .../data/serialization/ndjson/test_mmc.py | 125 +- .../ndjson/test_ndlabel_subclass_matching.py | 19 + .../data/serialization/ndjson/test_nested.py | 236 +--- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 + .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 +-- .../data/serialization/ndjson/test_text.py | 10 + .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +------------- 39 files changed, 4767 insertions(+), 2380 deletions(-) create mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py create mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py create mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json create mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index f9b82b422..5b5ac1f67 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,6 +6,7 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 1f3ee1d86..84dcac774 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,6 +386,7 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] + BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index e57c04a29..03327e0d1 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,28 +1,29 @@ -import labelbox.schema.annotation_import import labelbox.schema.asset_attachment -import labelbox.schema.batch +import labelbox.schema.bulk_import_request +import labelbox.schema.annotation_import import labelbox.schema.benchmark -import labelbox.schema.catalog import labelbox.schema.data_row -import labelbox.schema.data_row_metadata import labelbox.schema.dataset -import labelbox.schema.iam_integration -import labelbox.schema.identifiable -import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service -import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology -import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project -import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook +import labelbox.schema.data_row_metadata +import labelbox.schema.batch +import labelbox.schema.iam_integration +import labelbox.schema.media_type +import labelbox.schema.identifiables +import labelbox.schema.identifiable +import labelbox.schema.catalog +import labelbox.schema.ontology_kind +import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py new file mode 100644 index 000000000..8e11f3261 --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/bulk_import_request.py @@ -0,0 +1,1004 @@ +import json +import time +from uuid import UUID, uuid4 +import functools + +import logging +from pathlib import Path +from google.api_core import retry +from labelbox import parser +import requests +from pydantic import ( + ValidationError, + BaseModel, + Field, + field_validator, + model_validator, + ConfigDict, + StringConstraints, +) +from typing_extensions import Literal, Annotated +from typing import ( + Any, + List, + Optional, + BinaryIO, + Dict, + Iterable, + Tuple, + Union, + Type, + Set, + TYPE_CHECKING, +) + +from labelbox import exceptions as lb_exceptions +from labelbox import utils +from labelbox.orm import query +from labelbox.orm.db_object import DbObject +from labelbox.orm.model import Relationship +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.serialization import serialize_labels +from labelbox.orm.model import Field as lb_Field + +if TYPE_CHECKING: + from labelbox import Project + from labelbox.types import Label + +NDJSON_MIME_TYPE = "application/x-ndjson" +logger = logging.getLogger(__name__) + +# TODO: Deprecate this library in place of labelimport and malprediction import library. + + +def _determinants(parent_cls: Any) -> List[str]: + return [ + k + for k, v in parent_cls.model_fields.items() + if v.json_schema_extra and "determinant" in v.json_schema_extra + ] + + +def _make_file_name(project_id: str, name: str) -> str: + return f"{project_id}__{name}.ndjson" + + +# TODO(gszpak): move it to client.py +def _make_request_data( + project_id: str, name: str, content_length: int, file_name: str +) -> dict: + query_str = """mutation createBulkImportRequestFromFilePyApi( + $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + filePayload: { + file: $file, + contentLength: $contentLength + } + }) { + %s + } + } + """ % query.results_query_part(BulkImportRequest) + variables = { + "projectId": project_id, + "name": name, + "file": None, + "contentLength": content_length, + } + operations = json.dumps({"variables": variables, "query": query_str}) + + return { + "operations": operations, + "map": (None, json.dumps({file_name: ["variables.file"]})), + } + + +def _send_create_file_command( + client, + request_data: dict, + file_name: str, + file_data: Tuple[str, Union[bytes, BinaryIO], str], +) -> dict: + response = client.execute(data=request_data, files={file_name: file_data}) + + if not response.get("createBulkImportRequest", None): + raise lb_exceptions.LabelboxError( + "Failed to create BulkImportRequest, message: %s" + % response.get("errors", None) + or response.get("error", None) + ) + + return response + + +class BulkImportRequest(DbObject): + """Represents the import job when importing annotations. + + Attributes: + name (str) + state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) + input_file_url (str): URL to your web-hosted NDJSON file + error_file_url (str): NDJSON that contains error messages for failed annotations + status_file_url (str): NDJSON that contains status for each annotation + created_at (datetime): UTC timestamp for date BulkImportRequest was created + + project (Relationship): `ToOne` relationship to Project + created_by (Relationship): `ToOne` relationship to User + """ + + name = lb_Field.String("name") + state = lb_Field.Enum(BulkImportRequestState, "state") + input_file_url = lb_Field.String("input_file_url") + error_file_url = lb_Field.String("error_file_url") + status_file_url = lb_Field.String("status_file_url") + created_at = lb_Field.DateTime("created_at") + + project = Relationship.ToOne("Project") + created_by = Relationship.ToOne("User", False, "created_by") + + @property + def inputs(self) -> List[Dict[str, Any]]: + """ + Inputs for each individual annotation uploaded. + This should match the ndjson annotations that you have uploaded. + + Returns: + Uploaded ndjson. + + * This information will expire after 24 hours. + """ + return self._fetch_remote_ndjson(self.input_file_url) + + @property + def errors(self) -> List[Dict[str, Any]]: + """ + Errors for each individual annotation uploaded. This is a subset of statuses + + Returns: + List of dicts containing error messages. Empty list means there were no errors + See `BulkImportRequest.statuses` for more details. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.error_file_url) + + @property + def statuses(self) -> List[Dict[str, Any]]: + """ + Status for each individual annotation uploaded. + + Returns: + A status for each annotation if the upload is done running. + See below table for more details + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - Field + - Description + * - uuid + - Specifies the annotation for the status row. + * - dataRow + - JSON object containing the Labelbox data row ID for the annotation. + * - status + - Indicates SUCCESS or FAILURE. + * - errors + - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. + + * This information will expire after 24 hours. + """ + self.wait_until_done() + return self._fetch_remote_ndjson(self.status_file_url) + + @functools.lru_cache() + def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: + """ + Fetches the remote ndjson file and caches the results. + + Args: + url (str): Can be any url pointing to an ndjson file. + Returns: + ndjson as a list of dicts. + """ + response = requests.get(url) + response.raise_for_status() + return parser.loads(response.text) + + def refresh(self) -> None: + """Synchronizes values of all fields with the database.""" + query_str, params = query.get_single(BulkImportRequest, self.uid) + res = self.client.execute(query_str, params) + res = res[utils.camel_case(BulkImportRequest.type_name())] + self._set_field_values(res) + + def wait_till_done(self, sleep_time_seconds: int = 5) -> None: + self.wait_until_done(sleep_time_seconds) + + def wait_until_done(self, sleep_time_seconds: int = 5) -> None: + """Blocks import job until certain conditions are met. + + Blocks until the BulkImportRequest.state changes either to + `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, + periodically refreshing object's state. + + Args: + sleep_time_seconds (str): a time to block between subsequent API calls + """ + while self.state == BulkImportRequestState.RUNNING: + logger.info(f"Sleeping for {sleep_time_seconds} seconds...") + time.sleep(sleep_time_seconds) + self.__exponential_backoff_refresh() + + @retry.Retry( + predicate=retry.if_exception_type( + lb_exceptions.ApiLimitError, + lb_exceptions.TimeoutError, + lb_exceptions.NetworkError, + ) + ) + def __exponential_backoff_refresh(self) -> None: + self.refresh() + + @classmethod + def from_name( + cls, client, project_id: str, name: str + ) -> "BulkImportRequest": + """Fetches existing BulkImportRequest. + + Args: + client (Client): a Labelbox client + project_id (str): BulkImportRequest's project id + name (str): name of BulkImportRequest + Returns: + BulkImportRequest object + + """ + query_str = """query getBulkImportRequestPyApi( + $projectId: ID!, $name: String!) { + bulkImportRequest(where: { + projectId: $projectId, + name: $name + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name} + response = client.execute(query_str, params=params) + return cls(client, response["bulkImportRequest"]) + + @classmethod + def create_from_url( + cls, client, project_id: str, name: str, url: str, validate=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a publicly accessible URL + to an ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + url (str): publicly accessible URL pointing to ndjson file containing predictions + validate (bool): a flag indicating if there should be a validation + if `url` is valid ndjson + Returns: + BulkImportRequest object + """ + if validate: + logger.warn( + "Validation is turned on. The file will be downloaded locally and processed before uploading." + ) + res = requests.get(url) + data = parser.loads(res.text) + _validate_ndjson(data, client.get_project(project_id)) + + query_str = """mutation createBulkImportRequestPyApi( + $projectId: ID!, $name: String!, $fileUrl: String!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + fileUrl: $fileUrl + }) { + %s + } + } + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name, "fileUrl": url} + bulk_import_request_response = client.execute(query_str, params=params) + return cls( + client, bulk_import_request_response["createBulkImportRequest"] + ) + + @classmethod + def create_from_objects( + cls, + client, + project_id: str, + name: str, + predictions: Union[Iterable[Dict], Iterable["Label"]], + validate=True, + ) -> "BulkImportRequest": + """ + Creates a `BulkImportRequest` from an iterable of dictionaries. + + Conforms to JSON predictions format, e.g.: + ``{ + "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", + "schemaId": "ckappz7d700gn0zbocmqkwd9i", + "dataRow": { + "id": "ck1s02fqxm8fi0757f0e6qtdc" + }, + "bbox": { + "top": 48, + "left": 58, + "height": 865, + "width": 1512 + } + }`` + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + predictions (Iterable[dict]): iterable of dictionaries representing predictions + validate (bool): a flag indicating if there should be a validation + if `predictions` is valid ndjson + Returns: + BulkImportRequest object + """ + if not isinstance(predictions, list): + raise TypeError( + f"annotations must be in a form of Iterable. Found {type(predictions)}" + ) + ndjson_predictions = serialize_labels(predictions) + + if validate: + _validate_ndjson(ndjson_predictions, client.get_project(project_id)) + + data_str = parser.dumps(ndjson_predictions) + if not data_str: + raise ValueError("annotations cannot be empty") + + data = data_str.encode("utf-8") + file_name = _make_file_name(project_id, name) + request_data = _make_request_data( + project_id, name, len(data_str), file_name + ) + file_data = (file_name, data, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, + request_data=request_data, + file_name=file_name, + file_data=file_data, + ) + + return cls(client, response_data["createBulkImportRequest"]) + + @classmethod + def create_from_local_file( + cls, client, project_id: str, name: str, file: Path, validate_file=True + ) -> "BulkImportRequest": + """ + Creates a BulkImportRequest from a local ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + file (Path): local ndjson file with predictions + validate_file (bool): a flag indicating if there should be a validation + if `file` is a valid ndjson file + Returns: + BulkImportRequest object + + """ + file_name = _make_file_name(project_id, name) + content_length = file.stat().st_size + request_data = _make_request_data( + project_id, name, content_length, file_name + ) + + with file.open("rb") as f: + if validate_file: + reader = parser.reader(f) + # ensure that the underlying json load call is valid + # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 + # by iterating through the file so we only store + # each line in memory rather than the entire file + try: + _validate_ndjson(reader, client.get_project(project_id)) + except ValueError: + raise ValueError(f"{file} is not a valid ndjson file") + else: + f.seek(0) + file_data = (file.name, f, NDJSON_MIME_TYPE) + response_data = _send_create_file_command( + client, request_data, file_name, file_data + ) + return cls(client, response_data["createBulkImportRequest"]) + + def delete(self) -> None: + """Deletes the import job and also any annotations created by this import. + + Returns: + None + """ + id_param = "bulk_request_id" + query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { + deleteBulkImportRequest(where: {id: $%s}) { + id + name + } + }""" % (id_param, id_param) + self.client.execute(query_str, {id_param: self.uid}) + + +def _validate_ndjson( + lines: Iterable[Dict[str, Any]], project: "Project" +) -> None: + """ + Client side validation of an ndjson object. + + Does not guarentee that an upload will succeed for the following reasons: + * We are not checking the data row types which will cause the following errors to slip through + * Missing frame indices will not causes an error for videos + * Uploaded annotations for the wrong data type will pass (Eg. entity on images) + * We are not checking bounds of an asset (Eg. frame index, image height, text location) + + Args: + lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines + project (Project): id of project for which predictions will be imported + + Raises: + MALValidationError: Raise for invalid NDJson + UuidError: Duplicate UUID in upload + """ + feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( + project.ontology() + ) + uids: Set[str] = set() + for idx, line in enumerate(lines): + try: + annotation = NDAnnotation(**line) + annotation.validate_instance( + feature_schemas_by_id, feature_schemas_by_name + ) + uuid = str(annotation.uuid) + if uuid in uids: + raise lb_exceptions.UuidError( + f"{uuid} already used in this import job, " + "must be unique for the project." + ) + uids.add(uuid) + except (ValidationError, ValueError, TypeError, KeyError) as e: + raise lb_exceptions.MALValidationError( + f"Invalid NDJson on line {idx}" + ) from e + + +# The rest of this file contains objects for MAL validation +def parse_classification(tool): + """ + Parses a classification from an ontology. Only radio, checklist, and text are supported for mal + + Args: + tool (dict) + + Returns: + dict + """ + if tool["type"] in ["radio", "checklist"]: + option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] + option_names = [r["value"] for r in tool["options"]] + return { + "tool": tool["type"], + "featureSchemaId": tool["featureSchemaId"], + "name": tool["name"], + "options": [*option_schema_ids, *option_names], + } + elif tool["type"] == "text": + return { + "tool": tool["type"], + "name": tool["name"], + "featureSchemaId": tool["featureSchemaId"], + } + + +def get_mal_schemas(ontology): + """ + Converts a project ontology to a dict for easier lookup during ndjson validation + + Args: + ontology (Ontology) + Returns: + Dict, Dict : Useful for looking up a tool from a given feature schema id or name + """ + + valid_feature_schemas_by_schema_id = {} + valid_feature_schemas_by_name = {} + for tool in ontology.normalized["tools"]: + classifications = [ + parse_classification(classification_tool) + for classification_tool in tool["classifications"] + ] + classifications_by_schema_id = { + v["featureSchemaId"]: v for v in classifications + } + classifications_by_name = {v["name"]: v for v in classifications} + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + valid_feature_schemas_by_name[tool["name"]] = { + "tool": tool["tool"], + "classificationsBySchemaId": classifications_by_schema_id, + "classificationsByName": classifications_by_name, + "name": tool["name"], + } + for tool in ontology.normalized["classifications"]: + valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( + parse_classification(tool) + ) + valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) + return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name + + +class Bbox(BaseModel): + top: float + left: float + height: float + width: float + + +class Point(BaseModel): + x: float + y: float + + +class FrameLocation(BaseModel): + end: int + start: int + + +class VideoSupported(BaseModel): + # Note that frames are only allowed as top level inferences for video + frames: Optional[List[FrameLocation]] = None + + +# Base class for a special kind of union. +class SpecialUnion: + def __new__(cls, **kwargs): + return cls.build(kwargs) + + @classmethod + def __get_validators__(cls): + yield cls.build + + @classmethod + def get_union_types(cls): + if not issubclass(cls, SpecialUnion): + raise TypeError("{} must be a subclass of SpecialUnion") + + union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] + if len(union_types) < 1: + raise TypeError( + "Class {cls} should inherit from a union of objects to build" + ) + if len(union_types) > 1: + raise TypeError( + f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" + ) + return union_types[0].__args__[0].__args__ + + @classmethod + def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": + """ + Checks through all objects in the union to see which matches the input data. + Args: + data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union + raises: + KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion + ValidationError: Error while trying to construct a specific object in the union + + """ + if isinstance(data, BaseModel): + data = data.model_dump() + + top_level_fields = [] + max_match = 0 + matched = None + + for type_ in cls.get_union_types(): + determinate_fields = _determinants(type_) + top_level_fields.append(determinate_fields) + matches = sum([val in determinate_fields for val in data]) + if matches == len(determinate_fields) and matches > max_match: + max_match = matches + matched = type_ + + if matched is not None: + # These two have the exact same top level keys + if matched in [NDRadio, NDText]: + if isinstance(data["answer"], dict): + matched = NDRadio + elif isinstance(data["answer"], str): + matched = NDText + else: + raise TypeError( + f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" + ) + return matched(**data) + else: + raise KeyError( + f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." + ) + + @classmethod + def schema(cls): + results = {"definitions": {}} + for cl in cls.get_union_types(): + schema = cl.schema() + results["definitions"].update(schema.pop("definitions")) + results[cl.__name__] = schema + return results + + +class DataRow(BaseModel): + id: str + + +class NDFeatureSchema(BaseModel): + schemaId: Optional[str] = None + name: Optional[str] = None + + @model_validator(mode="after") + def most_set_one(self): + if self.schemaId is None and self.name is None: + raise ValueError( + "Must set either schemaId or name for all feature schemas" + ) + return self + + +class NDBase(NDFeatureSchema): + ontology_type: str + uuid: UUID + dataRow: DataRow + model_config = ConfigDict(extra="forbid") + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + if self.name: + if self.name not in valid_feature_schemas_by_name: + raise ValueError( + f"Name {self.name} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_name[self.name]["tool"] + ): + raise ValueError( + f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" + ) + + if self.schemaId: + if self.schemaId not in valid_feature_schemas_by_id: + raise ValueError( + f"Schema id {self.schemaId} is not valid for the provided project's ontology." + ) + + if ( + self.ontology_type + != valid_feature_schemas_by_id[self.schemaId]["tool"] + ): + raise ValueError( + f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" + ) + + def validate_instance( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + self.validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + + +###### Classifications ###### + + +class NDText(NDBase): + ontology_type: Literal["text"] = "text" + answer: str = Field(json_schema_extra={"determinant": True}) + # No feature schema to check + + +class NDChecklist(VideoSupported, NDBase): + ontology_type: Literal["checklist"] = "checklist" + answers: List[NDFeatureSchema] = Field( + json_schema_extra={"determinant": True} + ) + + @field_validator("answers", mode="before") + def validate_answers(cls, value, field): + # constr not working with mypy. + if not len(value): + raise ValueError("Checklist answers should not be empty") + return value + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + # Test top level feature schema for this tool + super(NDChecklist, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + # Test the feature schemas provided to the answer field + if len( + set([answer.name or answer.schemaId for answer in self.answers]) + ) != len(self.answers): + raise ValueError( + f"Duplicated featureSchema found for checklist {self.uuid}" + ) + for answer in self.answers: + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if answer.name not in options and answer.schemaId not in options: + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" + ) + + +class NDRadio(VideoSupported, NDBase): + ontology_type: Literal["radio"] = "radio" + answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) + + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDRadio, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + options = ( + valid_feature_schemas_by_name[self.name]["options"] + if self.name + else valid_feature_schemas_by_id[self.schemaId]["options"] + ) + if ( + self.answer.name not in options + and self.answer.schemaId not in options + ): + raise ValueError( + f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" + ) + + +# A union with custom construction logic to improve error messages +class NDClassification( + SpecialUnion, + Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore +): ... + + +###### Tools ###### + + +class NDBaseTool(NDBase): + classifications: List[NDClassification] = [] + + # This is indepdent of our problem + def validate_feature_schemas( + self, valid_feature_schemas_by_id, valid_feature_schemas_by_name + ): + super(NDBaseTool, self).validate_feature_schemas( + valid_feature_schemas_by_id, valid_feature_schemas_by_name + ) + for classification in self.classifications: + classification.validate_feature_schemas( + valid_feature_schemas_by_name[self.name][ + "classificationsBySchemaId" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsBySchemaId" + ], + valid_feature_schemas_by_name[self.name][ + "classificationsByName" + ] + if self.name + else valid_feature_schemas_by_id[self.schemaId][ + "classificationsByName" + ], + ) + + @field_validator("classifications", mode="before") + def validate_subclasses(cls, value, field): + # Create uuid and datarow id so we don't have to define classification objects twice + # This is caused by the fact that we require these ids for top level classifications but not for subclasses + results = [] + dummy_id = "child".center(25, "_") + for row in value: + results.append( + {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} + ) + return results + + +class NDPolygon(NDBaseTool): + ontology_type: Literal["polygon"] = "polygon" + polygon: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("polygon") + def is_geom_valid(cls, v): + if len(v) < 3: + raise ValueError( + f"A polygon must have at least 3 points to be valid. Found {v}" + ) + return v + + +class NDPolyline(NDBaseTool): + ontology_type: Literal["line"] = "line" + line: List[Point] = Field(json_schema_extra={"determinant": True}) + + @field_validator("line") + def is_geom_valid(cls, v): + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + return v + + +class NDRectangle(NDBaseTool): + ontology_type: Literal["rectangle"] = "rectangle" + bbox: Bbox = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class NDPoint(NDBaseTool): + ontology_type: Literal["point"] = "point" + point: Point = Field(json_schema_extra={"determinant": True}) + # Could check if points are positive + + +class EntityLocation(BaseModel): + start: int + end: int + + +class NDTextEntity(NDBaseTool): + ontology_type: Literal["named-entity"] = "named-entity" + location: EntityLocation = Field(json_schema_extra={"determinant": True}) + + @field_validator("location") + def is_valid_location(cls, v): + if isinstance(v, BaseModel): + v = v.model_dump() + + if len(v) < 2: + raise ValueError( + f"A line must have at least 2 points to be valid. Found {v}" + ) + if v["start"] < 0: + raise ValueError(f"Text location must be positive. Found {v}") + if v["start"] > v["end"]: + raise ValueError( + f"Text start location must be less or equal than end. Found {v}" + ) + return v + + +class RLEMaskFeatures(BaseModel): + counts: List[int] + size: List[int] + + @field_validator("counts") + def validate_counts(cls, counts): + if not all([count >= 0 for count in counts]): + raise ValueError( + "Found negative value for counts. They should all be zero or positive" + ) + return counts + + @field_validator("size") + def validate_size(cls, size): + if len(size) != 2: + raise ValueError( + f"Mask `size` should have two ints representing height and with. Found : {size}" + ) + if not all([count > 0 for count in size]): + raise ValueError( + f"Mask `size` should be a postitive int. Found : {size}" + ) + return size + + +class PNGMaskFeatures(BaseModel): + # base64 encoded png bytes + png: str + + +class URIMaskFeatures(BaseModel): + instanceURI: str + colorRGB: Union[List[int], Tuple[int, int, int]] + + @field_validator("colorRGB") + def validate_color(cls, colorRGB): + # Does the dtype matter? Can it be a float? + if not isinstance(colorRGB, (tuple, list)): + raise ValueError( + f"Received color that is not a list or tuple. Found : {colorRGB}" + ) + elif len(colorRGB) != 3: + raise ValueError( + f"Must provide RGB values for segmentation colors. Found : {colorRGB}" + ) + elif not all([0 <= color <= 255 for color in colorRGB]): + raise ValueError( + f"All rgb colors must be between 0 and 255. Found : {colorRGB}" + ) + return colorRGB + + +class NDMask(NDBaseTool): + ontology_type: Literal["superpixel"] = "superpixel" + mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( + json_schema_extra={"determinant": True} + ) + + +# A union with custom construction logic to improve error messages +class NDTool( + SpecialUnion, + Type[ # type: ignore + Union[ + NDMask, + NDTextEntity, + NDPoint, + NDRectangle, + NDPolyline, + NDPolygon, + ] + ], +): ... + + +class NDAnnotation( + SpecialUnion, + Type[Union[NDTool, NDClassification]], # type: ignore +): + @classmethod + def build(cls: Any, data) -> "NDBase": + if not isinstance(data, dict): + raise ValueError("value must be dict") + errors = [] + for cl in cls.get_union_types(): + try: + return cl(**data) + except KeyError as e: + errors.append(f"{cl.__name__}: {e}") + + raise ValueError( + "Unable to construct any annotation.\n{}".format("\n".join(errors)) + ) + + @classmethod + def schema(cls): + data = {"definitions": {}} + for type_ in cls.get_union_types(): + schema_ = type_.schema() + data["definitions"].update(schema_.pop("definitions")) + data[type_.__name__] = schema_ + return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index dfc87c8a4..6f8aebc58 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,6 +1,31 @@ from enum import Enum +class BulkImportRequestState(Enum): + """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). + + If you are not usinig MEA continue using BulkImportRequest. + AnnotationImports are in beta and will change soon. + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - State + - Description + * - RUNNING + - Indicates that the import job is not done yet. + * - FAILED + - Indicates the import job failed. Check `BulkImportRequest.errors` for more information + * - FINISHED + - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information + """ + + RUNNING = "RUNNING" + FAILED = "FAILED" + FINISHED = "FINISHED" + + class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f2de4db5e..f8876f7c4 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,29 +5,36 @@ import warnings from collections import namedtuple from datetime import datetime, timezone +from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, + Iterable, List, Optional, Tuple, + TypeVar, Union, overload, ) +from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +import requests +from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, + ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -39,6 +46,7 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, + validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -55,6 +63,7 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, + OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -63,7 +72,7 @@ ) if TYPE_CHECKING: - pass + from labelbox import BulkImportRequest DataRowPriority = int @@ -570,7 +579,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - "This function has only been tested to work with the Editor front end. Found %s", + f"This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -805,7 +814,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - "Batch exceeds max size, break into smaller batches" + f"Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1079,7 +1088,8 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Batch was not created successfully: " + json.dumps(task.errors) + f"Batch was not created successfully: " + + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1426,7 +1436,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Priority was not updated successfully: " + f"Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1478,6 +1488,33 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] + def bulk_import_requests(self) -> PaginatedCollection: + """Returns bulk import request objects which are used in model-assisted labeling. + These are returned with the oldest first, and most recent last. + """ + + id_param = "project_id" + query_str = """query ListAllImportRequestsPyApi($%s: ID!) { + bulkImportRequests ( + where: { projectId: $%s } + skip: %%d + first: %%d + ) { + %s + } + }""" % ( + id_param, + id_param, + query.results_query_part(Entity.BulkImportRequest), + ) + return PaginatedCollection( + self.client, + query_str, + {id_param: str(self.uid)}, + ["bulkImportRequests"], + Entity.BulkImportRequest, + ) + def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1592,7 +1629,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Data rows were not moved successfully: " + f"Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1602,6 +1639,77 @@ def _wait_for_task(self, task_id: str) -> Task: return task + def upload_annotations( + self, + name: str, + annotations: Union[str, Path, Iterable[Dict]], + validate: bool = False, + ) -> "BulkImportRequest": # type: ignore + """Uploads annotations to a new Editor project. + + Args: + name (str): name of the BulkImportRequest job + annotations (str or Path or Iterable): + url that is publicly accessible by Labelbox containing an + ndjson file + OR local path to an ndjson file + OR iterable of annotation rows + validate (bool): + Whether or not to validate the payload before uploading. + Returns: + BulkImportRequest + """ + + if isinstance(annotations, str) or isinstance(annotations, Path): + + def _is_url_valid(url: Union[str, Path]) -> bool: + """Verifies that the given string is a valid url. + + Args: + url: string to be checked + Returns: + True if the given url is valid otherwise False + + """ + if isinstance(url, Path): + return False + parsed = urlparse(url) + return bool(parsed.scheme) and bool(parsed.netloc) + + if _is_url_valid(annotations): + return Entity.BulkImportRequest.create_from_url( + client=self.client, + project_id=self.uid, + name=name, + url=str(annotations), + validate=validate, + ) + else: + path = Path(annotations) + if not path.exists(): + raise FileNotFoundError( + f"{annotations} is not a valid url nor existing local file" + ) + return Entity.BulkImportRequest.create_from_local_file( + client=self.client, + project_id=self.uid, + name=name, + file=path, + validate_file=validate, + ) + elif isinstance(annotations, Iterable): + return Entity.BulkImportRequest.create_from_objects( + client=self.client, + project_id=self.uid, + name=name, + predictions=annotations, # type: ignore + validate=validate, + ) + else: + raise ValueError( + f"Invalid annotations given of type: {type(annotations)}" + ) + def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py new file mode 100644 index 000000000..9abae1422 --- /dev/null +++ b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py @@ -0,0 +1,258 @@ +from unittest.mock import patch +import uuid +from labelbox import parser, Project +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +import pytest +import random +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.rectangle import ( + Rectangle, + RectangleUnit, +) +from labelbox.data.annotation_types.label import Label +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.ner import ( + DocumentEntity, + DocumentTextSelection, +) +from labelbox.data.annotation_types.video import VideoObjectAnnotation + +from labelbox.data.serialization import NDJsonConverter +from labelbox.exceptions import MALValidationError, UuidError +from labelbox.schema.bulk_import_request import BulkImportRequest +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.annotation_import import LabelImport, MALPredictionImport +from labelbox.schema.media_type import MediaType + +""" +- Here we only want to check that the uploads are calling the validation +- Then with unit tests we can check the types of errors raised +""" +# TODO: remove library once bulk import requests are removed + + +@pytest.mark.order(1) +def test_create_from_url(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_file(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + with pytest.raises(MALValidationError): + module_project.upload_annotations( + name=name, annotations=url, validate=True + ) + # Schema ids shouldn't match + + +def test_create_from_objects( + module_project: Project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_create_from_label_objects( + module_project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + labels = list(NDJsonConverter.deserialize(predictions)) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=labels + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + normalized_predictions = list(NDJsonConverter.serialize(labels)) + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, normalized_predictions + ) + + +def test_create_from_local_file( + tmp_path, predictions, module_project, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + file_name = f"{name}.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + parser.dump(predictions, f) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=str(file_path), validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_get(client, module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + bulk_import_request = BulkImportRequest.from_name( + client, project_id=module_project.uid, name=name + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_ndjson(tmp_path, module_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + +def test_validate_ndjson_uuid(tmp_path, module_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + uid = str(uuid.uuid4()) + repeat_uuid[0]["uuid"] = uid + repeat_uuid[1]["uuid"] = uid + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + +@pytest.mark.skip( + "Slow test and uses a deprecated api endpoint for annotation imports" +) +def test_wait_till_done(rectangle_inference, project): + name = str(uuid.uuid4()) + url = project.client.upload_data( + content=parser.dumps(rectangle_inference), sign=True + ) + bulk_import_request = project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert len(bulk_import_request.inputs) == 1 + bulk_import_request.wait_until_done() + assert bulk_import_request.state == BulkImportRequestState.FINISHED + + # Check that the status files are being returned as expected + assert len(bulk_import_request.errors) == 0 + assert len(bulk_import_request.inputs) == 1 + assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] + assert len(bulk_import_request.statuses) == 1 + assert bulk_import_request.statuses[0]["status"] == "SUCCESS" + assert ( + bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] + ) + + +def test_project_bulk_import_requests(module_project, predictions): + result = module_project.bulk_import_requests() + assert len(list(result)) == 0 + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + result = module_project.bulk_import_requests() + assert len(list(result)) == 3 + + +def test_delete(module_project, predictions): + name = str(uuid.uuid4()) + + bulk_import_requests = module_project.bulk_import_requests() + [ + bulk_import_request.delete() + for bulk_import_request in bulk_import_requests + ] + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 1 + + bulk_import_request.delete() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index 9e8963a26..a0df559fc 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,8 +1,24 @@ +from labelbox.schema.media_type import MediaType +from labelbox.schema.project import Project import pytest -from pytest_cases import fixture_ref, parametrize + +from labelbox import parser +from pytest_cases import parametrize, fixture_ref from labelbox.exceptions import MALValidationError -from labelbox.schema.media_type import MediaType +from labelbox.schema.bulk_import_request import ( + NDChecklist, + NDClassification, + NDMask, + NDPolygon, + NDPolyline, + NDRadio, + NDRectangle, + NDText, + NDTextEntity, + NDTool, + _validate_ndjson, +) """ - These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed @@ -175,6 +191,39 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) +def test_validate_ndjson(tmp_path, configured_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + configured_project.upload_annotations( + name="name", annotations=str(file_path), validate=True + ) + + +def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + repeat_uuid[0]["uuid"] = "test_uuid" + repeat_uuid[1]["uuid"] = "test_uuid" + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(MALValidationError): + configured_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(MALValidationError): + configured_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json new file mode 100644 index 000000000..4de15e217 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json @@ -0,0 +1,54 @@ +[ + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "confidence": 0.8, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" + }, + { + "answer": [ + { + "schemaId": "ckrb1sfl8099e0y919v260awv", + "confidence": 0.82, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } + ], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json new file mode 100644 index 000000000..83a95e5bf --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json @@ -0,0 +1,25 @@ +[{ + "location": { + "start": 67, + "end": 128 + }, + "messageId": "some-message-id", + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] +}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 75fe36e44..91563b8ae 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,17 +8,16 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - }, - "classifications": [] + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -29,17 +28,20 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - }, - "classifications": [] + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -48,39 +50,762 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 }, { - "x": 15.0, - "y": 20.0 + "x": 1119, + "y": 934 }, { - "x": 20.0, - "y": 25.0 + "x": 1118, + "y": 935 }, { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json new file mode 100644 index 000000000..591e40cf6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json @@ -0,0 +1,823 @@ +[ + { + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + "schemaId": "ckrazcueb16og0z6609jj7y3y", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + { + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + "schemaId": "ckrazcuec16ok0z66f956apb7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.834, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } + }, + { + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + "schemaId": "ckrazcuec16oi0z66dzrd8pfl", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.986, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "polygon": [ + { + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 + } + ] + }, + { + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + "schemaId": "ckrazcuec16om0z66bhhh4tp7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "point": { + "x": 2122, + "y": 1457 + } + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 466a03594..82be4cdab 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,86 +1,826 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "ckrazcueb16og0z6609jj7y3y", + "name": "box a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "confidence": 0.854, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.7 } - ], - "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - } + ] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "ckrazcuec16ok0z66f956apb7", + "name": "mask a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.834, + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + }, + "confidence": 0.685, "customMetrics": [ { "name": "customMetric1", - "value": 0.3 + "value": 0.4 + }, + { + "name": "customMetric2", + "value": 0.9 } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - } + ] }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "ckrazcuec16oi0z66dzrd8pfl", + "name": "polygon a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.986, + "confidence": 0.71, "customMetrics": [ { "name": "customMetric1", - "value": 0.9 + "value": 0.1 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 }, { - "x": 15.0, - "y": 20.0 + "x": 1099, + "y": 911 }, { - "x": 20.0, - "y": 25.0 + "x": 1100, + "y": 911 }, { - "x": 10.0, - "y": 20.0 + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "ckrazcuec16om0z66bhhh4tp7", + "name": "point a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, + "confidence": 0.77, + "customMetrics": [ + { + "name": "customMetric2", + "value": 1.2 + } + ], "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json new file mode 100644 index 000000000..31be5a4c7 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json @@ -0,0 +1,10 @@ +[ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "aggregation": "ARITHMETIC_MEAN", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "metricValue": 0.1 + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json new file mode 100644 index 000000000..f4b4894f6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json @@ -0,0 +1,155 @@ +[{ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 4, + "unit": "POINTS", + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 162.73, + "left": 32.45, + "height": 388.16999999999996, + "width": 101.66000000000001 + } +}, { + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "bbox": { + "top": 223.26, + "left": 251.42, + "height": 457.03999999999996, + "width": 186.78 + } +}, { + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 6, + "unit": "POINTS", + "confidence": 0.99, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 32.52, + "left": 218.17, + "height": 231.73, + "width": 110.56000000000003 + } +}, { + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "confidence": 0.89, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 117.39, + "left": 4.25, + "height": 456.9200000000001, + "width": 164.83 + } +}, { + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 8, + "unit": "POINTS", + "bbox": { + "top": 82.13, + "left": 217.28, + "height": 279.76, + "width": 82.43000000000004 + } +}, { + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 3, + "unit": "POINTS", + "bbox": { + "top": 298.12, + "left": 83.34, + "height": 203.83000000000004, + "width": 0.37999999999999545 + } +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "named_entity", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] +} +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json new file mode 100644 index 000000000..d6a9eecbd --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json @@ -0,0 +1,36 @@ +[ + { + "line": [ + { + "x": 2534.353, + "y": 249.471 + }, + { + "x": 2429.492, + "y": 182.092 + }, + { + "x": 2294.322, + "y": 221.962 + } + ], + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-line", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.58, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json new file mode 100644 index 000000000..1f26d8dc8 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json @@ -0,0 +1,26 @@ +[ + { + "location": { + "start": 67, + "end": 128 + }, + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json new file mode 100644 index 000000000..11e0753d9 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json @@ -0,0 +1,166 @@ +[{ + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" + }, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "schemaId": "ckrb1sfl8099e0y919v260awv" + }], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" +}, { + "classifications": [], + "schemaId": + "cl5islwg200gfci6g0oitaypu", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 + }, + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5iw0roz00lwci6g5jni62vs", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 + }, + "classifications": [] + }] + }] +}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..0bc3c8924 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,6 +37,13 @@ def test_serialization_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_classification(): label = Label( @@ -127,6 +134,12 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested(): label = Label( @@ -220,6 +233,13 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested_2(): label = Label( @@ -310,3 +330,9 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 82adce99c..8dcb17f0b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,73 +1,15 @@ import json -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - Radio, - Text, -) -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - ClassificationAnswer, -) -from labelbox.data.mixins import CustomMetric - def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.8, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.82, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -76,48 +18,6 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - name="classification a", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="choice 1", - ), - ), - ), - ClassificationAnnotation( - name="classification b", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.945, - name="choice 2", - ) - ], - ), - ), - ClassificationAnnotation( - name="classification c", - extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..f7da9181b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,12 +1,8 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -103,62 +99,25 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - -def test_conversation_entity_import(): - with open( - "tests/data/assets/ndjson/conversation_entity_import.json", "r" - ) as file: - data = json.load(file) - - label = lb_types.Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) - assert res == data + deserialized_label = list(NDJsonConverter().deserialize(ndjson)) + deserialized_label[0].annotations[0].extra.pop("uuid") + assert deserialized_label[0].model_dump(exclude_none=True) == label[ + 0 + ].model_dump(exclude_none=True) -def test_conversation_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/conversation_entity_import.json", "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_conversation_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - label = lb_types.Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, extra={}, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 999e1bda5..333c00250 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,29 +1,67 @@ +from copy import copy +import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter +from labelbox.data.serialization.ndjson.objects import ( + NDDicomSegments, + NDDicomSegment, + NDDicomLine, +) + +""" +Data gen prompt test data +""" + +prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), +) + +prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, +} + +data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], +) + +""" +Prompt annotation test +""" def test_serialize_label(): - prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - extra={"uuid": "test"}, - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), - ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + # Remove uuid field since this is a random value that can not be specified also meant for relationships + del serialized_label["uuid"] + assert serialized_label == prompt_text_ndjson + - prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "test", - } - - data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], +def test_deserialize_label(): + deserialized_label = next( + NDJsonConverter().deserialize([prompt_text_ndjson]) ) - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + if hasattr(deserialized_label.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized_label.annotations[0].extra = {} + assert deserialized_label.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) - assert serialized_label == prompt_text_ndjson + +def test_serialize_deserialize_label(): + serialized = list(NDJsonConverter.serialize([data_gen_label])) + deserialized = next(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized.annotations[0].extra = {} + assert deserialized.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..633214367 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,5 +1,6 @@ from copy import copy import pytest +import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -180,3 +181,28 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson + + +@pytest.mark.parametrize("label, ndjson", labels_ndjsons) +def test_deserialize_label(label, ndjson): + deserialized_label = next(NDJsonConverter().deserialize([ndjson])) + if hasattr(deserialized_label.annotations[0], "extra"): + deserialized_label.annotations[0].extra = {} + for i, annotation in enumerate(deserialized_label.annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value + + +@pytest.mark.parametrize("label", labels) +def test_serialize_deserialize_label(label): + serialized = list(NDJsonConverter.serialize([label])) + deserialized = list(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized[0].annotations[0], "extra"): + deserialized[0].annotations[0].extra = {} + for i, annotation in enumerate(deserialized[0].annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..5fe6a9789 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,19 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - RectangleUnit, - Point, - DocumentRectangle, - DocumentEntity, - DocumentTextSelection, -) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -66,144 +53,10 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - labels = [ - Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.89, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_with_name_only(): @@ -212,135 +65,26 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id=None, - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.74, - name="boxy", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson + + +def test_pdf_bbox_deserialize(): + deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) + deserialized[0].annotations[0].extra = {} + assert ( + deserialized[0].annotations[0].value + == bbox_labels[0].annotations[0].value + ) + assert ( + deserialized[0].annotations[0].name + == bbox_labels[0].annotations[0].name + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 1ab678cde..4adcd9935 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,6 +9,8 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", + file_path=None, + frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -20,7 +22,6 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", - "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -587,4 +588,31 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - assert label == video_serialized_bbox_label() + manual_label = video_serialized_bbox_label() + + for key in label.keys(): + # ignore uuid because we randomize if there was none + if key != "uuid": + assert label[key] == manual_label[key] + + assert len(label["segments"]) == 2 + assert len(label["segments"][0]["keyframes"]) == 2 + assert len(label["segments"][1]["keyframes"]) == 4 + + # #converts back only the keyframes. should be the sum of all prev segments + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + assert len(label.annotations) == 6 + + +def test_confidence_is_ignored(): + label = video_bbox_label() + serialized_labels = NDJsonConverter.serialize([label]) + label = next(serialized_labels) + label["confidence"] = 0.453 + label["segments"][0]["confidence"] = 0.453 + + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + for annotation in label.annotations: + assert annotation.confidence is None diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..84c017497 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,6 +34,16 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "text_answer" + assert annotation_value.confidence == 0.5 + def test_nested_serialization(): label = Label( @@ -92,3 +102,19 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + answer = annotation.value.answer[0] + assert answer.confidence == 0.9 + assert answer.name == "first_answer" + + classification_answer = answer.classifications[0].value.answer + assert classification_answer.confidence == 0.8 + assert classification_answer.name == "first_sub_radio_answer" + + sub_classification_answer = classification_answer.classifications[0].value + assert type(sub_classification_answer) is Text + assert sub_classification_answer.answer == "nested answer" + assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index d104a691e..2b3fa7f8c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,74 +1,73 @@ -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import json +import pytest + +from labelbox.data.serialization.ndjson.classification import NDRadio + from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - Radio, - ClassificationAnswer, -) +from labelbox.data.serialization.ndjson.objects import NDLine -def test_generic_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def round_dict(data): + if isinstance(data, dict): + for key in data: + if isinstance(data[key], float): + data[key] = int(data[key]) + elif isinstance(data[key], dict): + data[key] = round_dict(data[key]) + elif isinstance(data[key], (list, tuple)): + data[key] = [round_dict(r) for r in data[key]] - label = Label( - data=GenericDataRowData( - global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) + return data + + +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/classification_import_global_key.json", + "tests/data/assets/ndjson/metric_import_global_key.json", + "tests/data/assets/ndjson/polyline_import_global_key.json", + "tests/data/assets/ndjson/text_entity_import_global_key.json", + "tests/data/assets/ndjson/conversation_entity_import_global_key.json", + ], +) +def test_many_types(filename: str): + with open(filename, "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == data + f.close() - res = list(NDJsonConverter.serialize([label])) - assert res == expected +def test_image(): + with open( + "tests/data/assets/ndjson/image_import_global_key.json", "r" + ) as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() -def test_dict_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def test_pdf(): + with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() - label = Label( - data={ - "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", - }, - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) - res = list(NDJsonConverter.serialize([label])) +def test_video(): + with open( + "tests/data/assets/ndjson/video_import_global_key.json", "r" + ) as f: + data = json.load(f) - assert res == expected + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] + f.close() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..1729e1f46 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,8 +1,4 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -14,7 +10,6 @@ ImageData, MaskData, ) -from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -34,74 +29,12 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_image_with_name_only(): @@ -110,74 +43,11 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - name="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - name="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - name="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - name="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask(): @@ -187,11 +57,10 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], - "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -199,54 +68,16 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": (255, 0, 0), + "colorRGB": [255, 0, 0], }, - "classifications": [], }, ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) - mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) - mask_numpy = mask_numpy.astype(np.uint8) - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.8, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Mask( - mask=MaskData(arr=mask_numpy), - color=(1, 1, 1), - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - extra={}, - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=(255, 0, 0), - ), - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 40e098405..45c5c67bf 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,166 +1,38 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.annotation_types.metrics.confusion_matrix import ( - ConfusionMatrixMetric, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ScalarMetric, - ScalarMetricAggregation, - ConfusionMatrixAggregation, -) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert reserialized == data def test_custom_scalar_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: 0.1, 0.2: 0.5}, - "metricName": "custom_iou", - "aggregation": "SUM", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value=0.1, - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value={"0.1": 0.1, "0.2": 0.5}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_scalar_import.json", "r" + ) as file: + data = json.load(file) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) def test_custom_confusion_matrix_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (1, 1, 2, 3), - "metricName": "50%_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (0, 1, 2, 5), - "metricName": "50%_iou", - "featureName": "sample_class", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - "metricName": "50%_iou", - "aggregation": "CONFUSION_MATRIX", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ConfusionMatrixMetric( - value=(1, 1, 2, 3), - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value=(0, 1, 2, 5), - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" + ) as file: + data = json.load(file) - assert data == res + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 202f793fe..69594ff73 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,125 +1,32 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest from labelbox.data.serialization import NDJsonConverter -from labelbox.types import ( - Label, - MessageEvaluationTaskAnnotation, - MessageSingleSelectionTask, - MessageMultiSelectionTask, - MessageInfo, - OrderedMessageInfo, - MessageRankingTask, -) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cnjencjencjfencvj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="single-selection", - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, - value=MessageSingleSelectionTask( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - parent_message_id="clxfznjb800073b6v43ppx9ca", - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cfcerfvergerfefj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="multi-selection", - extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, - value=MessageMultiSelectionTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - selected_messages=[ - MessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - ) - ], - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=2, - ), - ], - ), - ) - ], - ), - ] + deserialized = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(deserialized)) - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert data == reserialized def test_mesage_ranking_task_wrong_order_serialization(): + with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: + data = json.load(file) + + some_ranking_task = next( + task + for task in data + if task["messageEvaluationTask"]["format"] == "message-ranking" + ) + some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ + "order" + ] = 3 + with pytest.raises(ValueError): - ( - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={ - "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" - }, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - ], - ), - ) - ], - ), - ) + list(NDJsonConverter.deserialize([some_ranking_task])) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py new file mode 100644 index 000000000..790bd87b3 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py @@ -0,0 +1,19 @@ +import json +from labelbox.data.serialization.ndjson.label import NDLabel +from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle +import pytest + + +def test_bad_annotation_input(): + data = [{"test": 3}] + with pytest.raises(ValueError): + NDLabel(**{"annotations": data}) + + +def test_correct_annotation_input(): + with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: + data = json.load(f) + assert isinstance( + NDLabel(**{"annotations": [data[0]]}).annotations[0], + NDDocumentRectangle, + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index 3633c9cbe..e0f0df0e6 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,135 +1,13 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Rectangle, - Point, - ClassificationAnnotation, - Radio, - ClassificationAnswer, - Text, - Checklist, -) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.34, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "5d03213e-4408-456c-9eca-cf0723202961", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.894, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={}, - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -138,112 +16,6 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="box a", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.811, - name="first answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box b", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification b", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.815, - name="second answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification c", - value=Checklist( - answer=[ - ClassificationAnswer( - name="third answer", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="a string", - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index cd11d97fe..97d48a14e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,76 +1,18 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ObjectAnnotation, Point, Line, Label - - -def test_polyline_import_with_confidence(): - with open( - "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" - ) as file: - data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_polyline_import_without_confidence(): - with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/polyline_without_confidence_import.json", + "tests/data/assets/ndjson/polyline_import.json", + ], +) +def test_polyline_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.58, - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..bd80f9267 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,3 +1,4 @@ +import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -39,6 +40,14 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_radio_classification(): label = Label( @@ -92,3 +101,10 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations[0].model_dump( + exclude_none=True + ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 0e42ab152..66630dbb5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,10 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -12,26 +8,8 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="bbox", - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - }, - value=Rectangle( - start=Point(x=38.0, y=28.0), - end=Point(x=81.0, y=69.0), - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -61,6 +39,8 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, }, ) @@ -68,9 +48,8 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - data = list(NDJsonConverter.serialize([label])) - - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_rectangle_mixed_start_end_points(): @@ -97,13 +76,17 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, + }, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - data = list(NDJsonConverter.serialize([label])) - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index 235b66957..f33719035 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,135 +1,16 @@ import json +from uuid import uuid4 -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Point, - Rectangle, - RelationshipAnnotation, - Relationship, -) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = [ - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - extra={}, - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl-DIFFERENT", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - ] + res = list(NDJsonConverter.deserialize(data)) res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -163,3 +44,29 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] + + +def test_relationship_nonexistent_object(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + relationship_annotation = data[2] + source_uuid = relationship_annotation["relationship"]["source"] + target_uuid = str(uuid4()) + relationship_annotation["relationship"]["target"] = target_uuid + error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" + + with pytest.raises(ValueError, match=error_msg): + list(NDJsonConverter.deserialize(data)) + + +def test_relationship_duplicate_uuids(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + source, target = data[0], data[1] + target["uuid"] = source["uuid"] + error_msg = f"UUID '{source['uuid']}' is not unique" + + with pytest.raises(AssertionError, match=error_msg): + list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..d5e81c51a 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,5 +1,7 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnswer, + Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -32,3 +34,11 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index fb93f15d4..3e856f001 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,68 +1,21 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, TextEntity - - -def test_text_entity_import(): - with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: - data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_text_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/text_entity_import.json", "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_text_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..c7a6535c4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json +from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, - Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,10 +13,8 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import ( - VideoClassificationAnnotation, - VideoObjectAnnotation, -) +from labelbox.data.annotation_types.video import VideoObjectAnnotation +from labelbox import parser from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -26,275 +24,15 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), - annotations=[ - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=30, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=31, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=32, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=33, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=34, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=35, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=50, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - frame=51, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=0, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=1, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=2, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=3, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=4, - ), - VideoClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - frame=5, - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, - value=Text(answer="a value"), - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=10.0, y=10.0), - Point(x=100.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=15.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5islwg200gfci6g0oitaypu", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=100.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=8, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=10.0), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=50.0, y=50.0), - frame=5, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=50.0), - frame=10, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=10.0), - end=Point(x=155.0, y=110.0), - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=30.0), - end=Point(x=155.0, y=80.0), - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - feature_schema_id="cl5iw0roz00lwci6g5jni62vs", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=200.0, y=300.0), - end=Point(x=350.0, y=700.0), - ), - frame=10, - keyframe=True, - segment_index=1, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_name_only(): @@ -302,274 +40,16 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), - annotations=[ - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=30, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=31, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=32, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=33, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=34, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=35, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=50, - ), - VideoClassificationAnnotation( - name="question 1", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - name="answer 1", - ), - ), - frame=51, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=0, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=1, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=2, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=3, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=4, - ), - VideoClassificationAnnotation( - name="question 2", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - name="answer 2", - ) - ], - ), - frame=5, - ), - ClassificationAnnotation( - name="question 3", - extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, - value=Text(answer="a value"), - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=10.0, y=10.0), - Point(x=100.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=15.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 1", - extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, - value=Line( - points=[ - Point(x=100.0, y=10.0), - Point(x=50.0, y=100.0), - Point(x=50.0, y=30.0), - ], - ), - frame=8, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=10.0), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=50.0, y=50.0), - frame=5, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 2", - extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, - value=Point(x=10.0, y=50.0), - frame=10, - keyframe=True, - segment_index=1, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=10.0), - end=Point(x=155.0, y=110.0), - ), - frame=1, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=5.0, y=30.0), - end=Point(x=155.0, y=80.0), - ), - frame=5, - keyframe=True, - segment_index=0, - ), - VideoObjectAnnotation( - name="segment 3", - extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, - value=Rectangle( - start=Point(x=200.0, y=300.0), - end=Point(x=350.0, y=700.0), - ), - frame=10, - keyframe=True, - segment_index=1, - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_classification_global_subclassifications(): @@ -587,6 +67,7 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( + name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -613,7 +94,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = { + expected_second_annotation = nested_checklist_annotation_ndjson = { "name": "nested_checklist_question", "answer": [ { @@ -635,6 +116,12 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + assert annotation.name == label.annotations[i].name + def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -800,6 +287,14 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_video_classification_point(): bbox_annotation = [ @@ -950,6 +445,13 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + def test_video_classification_frameline(): bbox_annotation = [ @@ -1117,289 +619,9 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - -[ - { - "answer": "a value", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 5, "start": 0}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5islwg200gfci6g0oitaypu", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - { - "classifications": [], - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - } - ] - }, - ], - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - } - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - }, - { - "classifications": [], - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - }, - ] - }, - ], - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "segments": [ - { - "keyframes": [ - { - "bbox": { - "height": 100.0, - "left": 5.0, - "top": 10.0, - "width": 150.0, - }, - "classifications": [], - "frame": 1, - }, - { - "bbox": { - "height": 50.0, - "left": 5.0, - "top": 30.0, - "width": 150.0, - }, - "classifications": [], - "frame": 5, - }, - ] - }, - { - "keyframes": [ - { - "bbox": { - "height": 400.0, - "left": 200.0, - "top": 300.0, - "width": 150.0, - }, - "classifications": [], - "frame": 10, - } - ] - }, - ], - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - }, -] - -[ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}], - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "classifications": [], - "schemaId": "cl5islwg200gfci6g0oitaypu", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - { - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - } - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - "classifications": [], - } - ] - }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - "classifications": [], - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - "classifications": [], - }, - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0, - }, - "classifications": [], - }, - { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0, - }, - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0, - }, - "classifications": [], - } - ] - }, - ], - }, -] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value From 1c50842070900615b2cebbf4124d0859d476ffce Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 23 Sep 2024 11:53:40 -0700 Subject: [PATCH 06/22] Fix exception type for labeling service test (#1835) --- libs/labelbox/src/labelbox/client.py | 41 +++++++++---------- .../integration/test_labeling_service.py | 9 ++-- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index b0b5a1407..055bee676 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -9,10 +9,9 @@ import urllib.parse from collections import defaultdict from datetime import datetime, timezone -from typing import Any, List, Dict, Union, Optional, overload, Callable from types import MappingProxyType +from typing import Any, Dict, List, Optional, Union, overload -from labelbox.schema.search_filters import SearchFilter import requests import requests.exceptions from google.api_core import retry @@ -26,20 +25,18 @@ from labelbox.orm.model import Entity, Field from labelbox.pagination import PaginatedCollection from labelbox.schema import role -from labelbox.schema.conflict_resolution_strategy import ( - ConflictResolutionStrategy, -) -from labelbox.schema.data_row import DataRow from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRow from labelbox.schema.data_row_metadata import DataRowMetadataOntology from labelbox.schema.dataset import Dataset from labelbox.schema.embedding import Embedding from labelbox.schema.enums import CollectionJobStatus from labelbox.schema.foundry.foundry_client import FoundryClient from labelbox.schema.iam_integration import IAMIntegration -from labelbox.schema.identifiables import DataRowIds -from labelbox.schema.identifiables import GlobalKeys +from labelbox.schema.identifiables import DataRowIds, GlobalKeys +from labelbox.schema.label_score import LabelScore from labelbox.schema.labeling_frontend import LabelingFrontend +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard from labelbox.schema.media_type import ( MediaType, get_media_type_validation_error, @@ -47,40 +44,40 @@ from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig from labelbox.schema.model_run import ModelRun -from labelbox.schema.ontology import Ontology, DeleteFeatureFromOntologyResult from labelbox.schema.ontology import ( - Tool, Classification, + DeleteFeatureFromOntologyResult, FeatureSchema, + Ontology, PromptResponseClassification, + Tool, +) +from labelbox.schema.ontology_kind import ( + EditorTaskType, + EditorTaskTypeMapper, + OntologyKind, ) from labelbox.schema.organization import Organization from labelbox.schema.project import Project from labelbox.schema.quality_mode import ( - QualityMode, BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS, BENCHMARK_AUTO_AUDIT_PERCENTAGE, CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS, CONSENSUS_AUTO_AUDIT_PERCENTAGE, + QualityMode, ) from labelbox.schema.queue_mode import QueueMode from labelbox.schema.role import Role +from labelbox.schema.search_filters import SearchFilter from labelbox.schema.send_to_annotate_params import ( SendToAnnotateFromCatalogParams, + build_annotations_input, build_destination_task_queue_input, build_predictions_input, - build_annotations_input, ) from labelbox.schema.slice import CatalogSlice, ModelSlice -from labelbox.schema.task import Task, DataUpsertTask +from labelbox.schema.task import DataUpsertTask, Task from labelbox.schema.user import User -from labelbox.schema.label_score import LabelScore -from labelbox.schema.ontology_kind import ( - OntologyKind, - EditorTaskTypeMapper, - EditorTaskType, -) -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard logger = logging.getLogger(__name__) @@ -540,7 +537,7 @@ def upload_data( error_msg = next(iter(errors), {}).get( "message", "Unknown error" ) - except Exception as e: + except Exception: error_msg = "Unknown error" raise labelbox.exceptions.LabelboxError( "Failed to upload, message: %s" % error_msg @@ -842,7 +839,7 @@ def create_dataset( if not validation_result["validateDataset"]["valid"]: raise labelbox.exceptions.LabelboxError( - f"IAMIntegration was not successfully added to the dataset." + "IAMIntegration was not successfully added to the dataset." ) except Exception as e: dataset.delete() diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index 09b5c24a1..04a1cb507 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -1,6 +1,9 @@ import pytest -from labelbox.exceptions import LabelboxError, ResourceNotFoundError +from labelbox.exceptions import ( + MalformedQueryException, + ResourceNotFoundError, +) from labelbox.schema.labeling_service import LabelingServiceStatus @@ -51,7 +54,7 @@ def test_request_labeling_service_moe_project( labeling_service = project.get_labeling_service() with pytest.raises( - LabelboxError, + MalformedQueryException, match='[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]', ): labeling_service.request() @@ -73,5 +76,5 @@ def test_request_labeling_service_incomplete_requirements(ontology, project): ): # No labeling service by default labeling_service.request() project.connect_ontology(ontology) - with pytest.raises(LabelboxError): + with pytest.raises(MalformedQueryException): labeling_service.request() From 929ca7f4f6dd13c26e7d1aa881b1e3956c708034 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:10:52 -0500 Subject: [PATCH 07/22] Added merge environmental settings to prepared request (#1811) --- libs/labelbox/src/labelbox/client.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 055bee676..02c93850e 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -254,7 +254,13 @@ def convert_value(value): prepped: requests.PreparedRequest = request.prepare() - response = self._connection.send(prepped, timeout=timeout) + settings = self._connection.merge_environment_settings( + prepped.url, {}, None, None, None + ) + + response = self._connection.send( + prepped, timeout=timeout, **settings + ) logger.debug("Response: %s", response.text) except requests.exceptions.Timeout as e: raise labelbox.exceptions.TimeoutError(str(e)) From 8724901519bc3954627d6d82dd85584724be8411 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 24 Sep 2024 10:55:39 -0700 Subject: [PATCH 08/22] [PLT-1572] Add wait for label processing for annotation upload tests (#1842) --- .../test_generic_data_types.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index 9de67bd4e..76236f0dd 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -1,17 +1,18 @@ import datetime +import itertools +import uuid + +import pytest + +import labelbox as lb +from labelbox import Client, OntologyKind, Project +from labelbox.data.annotation_types import Label from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.annotation_types import Label -import pytest -import uuid - -import labelbox as lb -from labelbox.schema.media_type import MediaType from labelbox.schema.annotation_import import AnnotationImportState -from labelbox import Project, Client, OntologyKind -import itertools +from labelbox.schema.media_type import MediaType """ - integration test for importing mal labels and ground truths with each supported MediaType. @@ -129,6 +130,7 @@ def test_import_media_types( export_v2_test_helpers, helpers, media_type, + wait_for_label_processing, ): annotations_ndjson = list( itertools.chain.from_iterable(annotations_by_media_type[media_type]) @@ -145,6 +147,8 @@ def test_import_media_types( assert label_import.state == AnnotationImportState.FINISHED assert len(label_import.errors) == 0 + wait_for_label_processing(configured_project)[0] + result = export_v2_test_helpers.run_project_export_v2_task( configured_project ) From 3493ef986e768b283babd1084e68400b71c06e4d Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 27 Sep 2024 09:45:10 -0700 Subject: [PATCH 09/22] [PLT-1492] Fix 'flaky' tests spotted during a prod run (#1846) --- libs/labelbox/tests/conftest.py | 66 +++++++++++++------ .../test_generic_data_types.py | 52 +++++++++++++-- .../integration/schema/test_user_group.py | 32 +++------ libs/labelbox/tests/integration/test_label.py | 4 +- 4 files changed, 103 insertions(+), 51 deletions(-) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index 6d13a8d83..49eab165d 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -1,35 +1,39 @@ -from datetime import datetime -from random import randint -from string import ascii_letters - import json import os import re -import uuid import time -from labelbox.schema.project import Project -import requests -from labelbox.schema.ontology import Ontology -import pytest -from types import SimpleNamespace -from typing import Type +import uuid +from datetime import datetime from enum import Enum -from typing import Tuple +from random import randint +from string import ascii_letters +from types import SimpleNamespace +from typing import Tuple, Type + +import pytest +import requests -from labelbox import Dataset, DataRow -from labelbox import MediaType +from labelbox import ( + Classification, + Client, + DataRow, + Dataset, + LabelingFrontend, + MediaType, + OntologyBuilder, + Option, + Tool, +) +from labelbox.exceptions import LabelboxError from labelbox.orm import query from labelbox.pagination import PaginatedCollection +from labelbox.schema.annotation_import import LabelImport +from labelbox.schema.enums import AnnotationImportState from labelbox.schema.invite import Invite +from labelbox.schema.ontology import Ontology +from labelbox.schema.project import Project from labelbox.schema.quality_mode import QualityMode from labelbox.schema.queue_mode import QueueMode -from labelbox import Client - -from labelbox import LabelingFrontend -from labelbox import OntologyBuilder, Tool, Option, Classification -from labelbox.schema.annotation_import import LabelImport -from labelbox.schema.enums import AnnotationImportState -from labelbox.exceptions import LabelboxError IMG_URL = "https://picsum.photos/200/300.jpg" MASKABLE_IMG_URL = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg" @@ -1255,6 +1259,21 @@ def teardown_ontology_feature_schemas(ontology: Ontology): class ModuleTearDownHelpers(TearDownHelpers): ... +class LabelHelpers: + def wait_for_labels(self, project, number_of_labels=1): + timeout_seconds = 10 + while True: + labels = list(project.labels()) + if len(labels) >= number_of_labels: + return labels + timeout_seconds -= 2 + if timeout_seconds <= 0: + raise TimeoutError( + f"Timed out waiting for label for project '{project.uid}' to finish processing" + ) + time.sleep(2) + + @pytest.fixture def teardown_helpers(): return TearDownHelpers() @@ -1263,3 +1282,8 @@ def teardown_helpers(): @pytest.fixture(scope="module") def module_teardown_helpers(): return TearDownHelpers() + + +@pytest.fixture +def label_helpers(): + return LabelHelpers() diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index 76236f0dd..1cc5538d9 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -112,11 +112,6 @@ def test_generic_data_row_type_by_global_key( (MediaType.Conversational, MediaType.Conversational), (MediaType.Document, MediaType.Document), (MediaType.Dicom, MediaType.Dicom), - ( - MediaType.LLMPromptResponseCreation, - MediaType.LLMPromptResponseCreation, - ), - (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation), (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation), ], @@ -186,6 +181,53 @@ def test_import_media_types( assert exported_annotations == expected_data +@pytest.mark.parametrize( + "configured_project, media_type", + [ + ( + MediaType.LLMPromptResponseCreation, + MediaType.LLMPromptResponseCreation, + ), + (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation), + ], + indirect=["configured_project"], +) +def test_import_media_types_llm( + client: Client, + configured_project: Project, + annotations_by_media_type, + exports_v2_by_media_type, + export_v2_test_helpers, + helpers, + media_type, + wait_for_label_processing, +): + annotations_ndjson = list( + itertools.chain.from_iterable(annotations_by_media_type[media_type]) + ) + + label_import = lb.LabelImport.create_from_objects( + client, + configured_project.uid, + f"test-import-{media_type}", + annotations_ndjson, + ) + label_import.wait_until_done() + + assert label_import.state == AnnotationImportState.FINISHED + assert len(label_import.errors) == 0 + + all_annotations = sorted([a["uuid"] for a in annotations_ndjson]) + successful_annotations = sorted( + [ + status["uuid"] + for status in label_import.statuses + if status["status"] == "SUCCESS" + ] + ) + assert successful_annotations == all_annotations + + @pytest.mark.parametrize( "configured_project_by_global_key, media_type", [ diff --git a/libs/labelbox/tests/integration/schema/test_user_group.py b/libs/labelbox/tests/integration/schema/test_user_group.py index 6aebd4e89..50aaad4a7 100644 --- a/libs/labelbox/tests/integration/schema/test_user_group.py +++ b/libs/labelbox/tests/integration/schema/test_user_group.py @@ -1,13 +1,13 @@ -import pytest -import faker from uuid import uuid4 -from labelbox import Client -from labelbox.schema.user_group import UserGroup, UserGroupColor + +import faker +import pytest + from labelbox.exceptions import ( - ResourceNotFoundError, ResourceCreationError, - UnprocessableEntityError, + ResourceNotFoundError, ) +from labelbox.schema.user_group import UserGroup, UserGroupColor data = faker.Faker() @@ -147,9 +147,6 @@ def test_cannot_update_group_id(user_group): def test_get_user_groups_with_creation_deletion(client): user_group = None try: - # Get all user groups - user_groups = list(UserGroup(client).get_user_groups()) - # manual delete for iterators group_name = data.name() user_group = UserGroup(client) @@ -157,25 +154,12 @@ def test_get_user_groups_with_creation_deletion(client): user_group.create() user_groups_post_creation = list(UserGroup(client).get_user_groups()) + assert user_group in user_groups_post_creation - # Verify that at least one user group is returned - assert len(user_groups_post_creation) > 0 - assert len(user_groups_post_creation) == len(user_groups) + 1 - - # Verify that each user group has a valid ID and name - for ug in user_groups_post_creation: - assert ug.id is not None - assert ug.name is not None - - user_group.delete() user_group = None user_groups_post_deletion = list(UserGroup(client).get_user_groups()) - - assert ( - len(user_groups_post_deletion) == len(user_groups_post_creation) - 1 - ) - + assert user_group not in user_groups_post_deletion finally: if user_group: user_group.delete() diff --git a/libs/labelbox/tests/integration/test_label.py b/libs/labelbox/tests/integration/test_label.py index 1bd8a8276..0daa4758e 100644 --- a/libs/labelbox/tests/integration/test_label.py +++ b/libs/labelbox/tests/integration/test_label.py @@ -41,11 +41,13 @@ def test_label_update(configured_project_with_label): assert label.label == "something else" -def test_label_filter_order(configured_project_with_label): +def test_label_filter_order(configured_project_with_label, label_helpers): project, _, _, label = configured_project_with_label l1 = label project.create_label() + label_helpers.wait_for_labels(project, 2) + l2 = next(project.labels()) assert set(project.labels()) == {l1, l2} From 45ce0a370c275393908e5fbfa1f86a34f4e01bfe Mon Sep 17 00:00:00 2001 From: sfendell-labelbox <150080555+sfendell-labelbox@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:57:35 -0700 Subject: [PATCH 10/22] Support percent for rectangles. (#1848) --- .../src/labelbox/data/annotation_types/geometry/rectangle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/geometry/rectangle.py b/libs/labelbox/src/labelbox/data/annotation_types/geometry/rectangle.py index 5cabf0957..af37734b0 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/geometry/rectangle.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/geometry/rectangle.py @@ -94,6 +94,7 @@ class RectangleUnit(Enum): INCHES = "INCHES" PIXELS = "PIXELS" POINTS = "POINTS" + PERCENT = "PERCENT" class DocumentRectangle(Rectangle): From 2d2def25c8c54623446fdc50464fba2ef2c461db Mon Sep 17 00:00:00 2001 From: sfendell-labelbox <150080555+sfendell-labelbox@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:12:22 -0700 Subject: [PATCH 11/22] v.5.1.0 release prep pr (#1849) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 5 +++++ libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a67a44a24..51648857e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '5.0.0' +release = '5.1.0' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index b2d41b56d..6b23cf6bc 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,4 +1,9 @@ # Changelog +# Version 5.1.0 (2024-09-27) +## Fixed +* Support self-signed SSL certs([#1811](https://github.com/Labelbox/labelbox-python/pull/1811)) +* Rectangle units now correctly support percent inputs([#1848](https://github.com/Labelbox/labelbox-python/pull/1848)) + # Version 5.0.0 (2024-09-16) ## Updated * Set tasks_remaining_count to None LabelingServiceDashboard if labeling has not started ([#1817](https://github.com/Labelbox/labelbox-python/pull/1817)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index f4c24af59..bc13a34d7 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "5.0.0" +version = "5.1.0" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 5b5ac1f67..981520719 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,6 +1,6 @@ name = "labelbox" -__version__ = "5.0.0" +__version__ = "5.1.0" from labelbox.client import Client from labelbox.schema.project import Project From cba1ce4c29038665c1b74a27ac0d48c24ac5ebcf Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 1 Oct 2024 14:26:29 -0700 Subject: [PATCH 12/22] [PLT-1611] Vb/placeholder datarows (#1851) --- libs/labelbox/src/labelbox/schema/dataset.py | 38 ++-- libs/labelbox/tests/integration/conftest.py | 79 +++++--- .../tests/integration/test_data_rows.py | 186 ++++++++++-------- .../tests/integration/test_mmc_data_rows.py | 58 ++++++ 4 files changed, 229 insertions(+), 132 deletions(-) create mode 100644 libs/labelbox/tests/integration/test_mmc_data_rows.py diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index 04877c885..16c993dfa 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -1,57 +1,43 @@ -from datetime import datetime -from typing import Dict, Generator, List, Optional, Any, Final, Tuple, Union -import os import json import logging -from collections.abc import Iterable -from string import Template -import time +import os import warnings - -from labelbox import parser -from itertools import islice - from concurrent.futures import ThreadPoolExecutor, as_completed -from io import StringIO -import requests +from itertools import islice +from string import Template +from typing import Any, Dict, List, Optional, Tuple, Union +import labelbox.schema.internal.data_row_uploader as data_row_uploader from labelbox.exceptions import ( InvalidQueryError, LabelboxError, - ResourceNotFoundError, ResourceCreationError, + ResourceNotFoundError, ) +from labelbox.orm import query from labelbox.orm.comparison import Comparison -from labelbox.orm.db_object import DbObject, Updateable, Deletable, experimental +from labelbox.orm.db_object import DbObject, Deletable, Updateable from labelbox.orm.model import Entity, Field, Relationship -from labelbox.orm import query -from labelbox.exceptions import MalformedQueryException from labelbox.pagination import PaginatedCollection from labelbox.schema.data_row import DataRow -from labelbox.schema.embedding import EmbeddingVector from labelbox.schema.export_filters import DatasetExportFilters, build_filters from labelbox.schema.export_params import ( CatalogExportParams, validate_catalog_export_params, ) from labelbox.schema.export_task import ExportTask -from labelbox.schema.identifiable import UniqueId, GlobalKey -from labelbox.schema.task import Task, DataUpsertTask -from labelbox.schema.user import User from labelbox.schema.iam_integration import IAMIntegration +from labelbox.schema.identifiable import GlobalKey, UniqueId from labelbox.schema.internal.data_row_upsert_item import ( + DataRowCreateItem, DataRowItemBase, DataRowUpsertItem, - DataRowCreateItem, -) -import labelbox.schema.internal.data_row_uploader as data_row_uploader -from labelbox.schema.internal.descriptor_file_creator import ( - DescriptorFileCreator, ) from labelbox.schema.internal.datarow_upload_constants import ( FILE_UPLOAD_THREAD_COUNT, UPSERT_CHUNK_SIZE_BYTES, ) +from labelbox.schema.task import DataUpsertTask, Task logger = logging.getLogger(__name__) @@ -359,7 +345,7 @@ def data_row_for_external_id(self, external_id) -> "DataRow": ) if len(data_rows) > 1: logger.warning( - f"More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all", + "More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all", external_id, ) return data_rows[0] diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index c917a6164..10b05681e 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -1,42 +1,53 @@ -from collections import defaultdict -from itertools import islice -import json import os import sys -import re import time -import uuid -import requests -from types import SimpleNamespace -from typing import Type, List -from enum import Enum -from typing import Tuple +from collections import defaultdict +from datetime import datetime, timezone +from itertools import islice +from typing import Type import pytest -import requests -from labelbox import Dataset, DataRow -from labelbox import LabelingFrontend from labelbox import ( - OntologyBuilder, - Tool, - Option, Classification, + Client, + Dataset, + LabelingFrontend, MediaType, + OntologyBuilder, + Option, PromptResponseClassification, ResponseOption, + Tool, ) -from labelbox.orm import query -from labelbox.pagination import PaginatedCollection -from labelbox.schema.annotation_import import LabelImport -from labelbox.schema.catalog import Catalog -from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.invite import Invite -from labelbox.schema.quality_mode import QualityMode +from labelbox.schema.data_row import DataRowMetadataField +from labelbox.schema.ontology_kind import OntologyKind from labelbox.schema.queue_mode import QueueMode from labelbox.schema.user import User -from labelbox import Client -from labelbox.schema.ontology_kind import OntologyKind + + +@pytest.fixture +def constants(): + SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" + TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" + TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" + CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" + EXPECTED_METADATA_SCHEMA_IDS = [ + SPLIT_SCHEMA_ID, + TEST_SPLIT_ID, + TEXT_SCHEMA_ID, + CAPTURE_DT_SCHEMA_ID, + ] + CUSTOM_TEXT_SCHEMA_NAME = "custom_text" + + return { + "SPLIT_SCHEMA_ID": SPLIT_SCHEMA_ID, + "TEST_SPLIT_ID": TEST_SPLIT_ID, + "TEXT_SCHEMA_ID": TEXT_SCHEMA_ID, + "CAPTURE_DT_SCHEMA_ID": CAPTURE_DT_SCHEMA_ID, + "EXPECTED_METADATA_SCHEMA_IDS": EXPECTED_METADATA_SCHEMA_IDS, + "CUSTOM_TEXT_SCHEMA_NAME": CUSTOM_TEXT_SCHEMA_NAME, + } @pytest.fixture @@ -835,3 +846,21 @@ def print_perf_summary(): for aaa in islice(sorted_dict, num_of_entries) ] print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) + + +@pytest.fixture +def make_metadata_fields(constants): + msg = "A message" + time = datetime.now(timezone.utc) + + fields = [ + DataRowMetadataField( + schema_id=constants["SPLIT_SCHEMA_ID"], + value=constants["TEST_SPLIT_ID"], + ), + DataRowMetadataField( + schema_id=constants["CAPTURE_DT_SCHEMA_ID"], value=time + ), + DataRowMetadataField(schema_id=constants["TEXT_SCHEMA_ID"], value=msg), + ] + return fields diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 7f69c2995..9f0429269 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -1,44 +1,37 @@ -from tempfile import NamedTemporaryFile -import uuid -from datetime import datetime import json -import requests import os - +import uuid +from datetime import datetime, timezone +from tempfile import NamedTemporaryFile from unittest.mock import patch + import pytest +import requests -from labelbox.schema.media_type import MediaType -from labelbox import DataRow, AssetAttachment +from labelbox import AssetAttachment, DataRow from labelbox.exceptions import ( + InvalidQueryError, MalformedQueryException, ResourceCreationError, - InvalidQueryError, ) -from labelbox.schema.task import Task, DataUpsertTask from labelbox.schema.data_row_metadata import ( DataRowMetadataField, DataRowMetadataKind, ) - -SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" -TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" -TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" -CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" -EXPECTED_METADATA_SCHEMA_IDS = [ - SPLIT_SCHEMA_ID, - TEST_SPLIT_ID, - TEXT_SCHEMA_ID, - CAPTURE_DT_SCHEMA_ID, -].sort() -CUSTOM_TEXT_SCHEMA_NAME = "custom_text" +from labelbox.schema.media_type import MediaType +from labelbox.schema.task import Task @pytest.fixture -def mdo(client): +def mdo( + client, + constants, +): mdo = client.get_data_row_metadata_ontology() try: - mdo.create_schema(CUSTOM_TEXT_SCHEMA_NAME, DataRowMetadataKind.string) + mdo.create_schema( + constants["CUSTOM_TEXT_SCHEMA_NAME"], DataRowMetadataKind.string + ) except MalformedQueryException: # Do nothing if already exists pass @@ -93,26 +86,18 @@ def tile_content(): } -def make_metadata_fields(): - msg = "A message" - time = datetime.utcnow() - - fields = [ - DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID), - DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time), - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg), - ] - return fields - - -def make_metadata_fields_dict(): +@pytest.fixture +def make_metadata_fields_dict(constants): msg = "A message" - time = datetime.utcnow() + time = datetime.now(timezone.utc) fields = [ - {"schema_id": SPLIT_SCHEMA_ID, "value": TEST_SPLIT_ID}, - {"schema_id": CAPTURE_DT_SCHEMA_ID, "value": time}, - {"schema_id": TEXT_SCHEMA_ID, "value": msg}, + { + "schema_id": constants["SPLIT_SCHEMA_ID"], + "value": constants["TEST_SPLIT_ID"], + }, + {"schema_id": constants["CAPTURE_DT_SCHEMA_ID"], "value": time}, + {"schema_id": constants["TEXT_SCHEMA_ID"], "value": msg}, ] return fields @@ -375,15 +360,22 @@ def test_create_data_row_with_invalid_input(dataset, image_url): dataset.create_data_row("asdf") -def test_create_data_row_with_metadata(mdo, dataset, image_url): +def test_create_data_row_with_metadata( + mdo, + dataset, + image_url, + make_metadata_fields, + constants, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields() + row_data=image_url, metadata_fields=make_metadata_fields ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() @@ -396,19 +388,21 @@ def test_create_data_row_with_metadata(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): +def test_create_data_row_with_metadata_dict( + mdo, dataset, image_url, constants, make_metadata_fields_dict +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields_dict() + row_data=image_url, metadata_fields=make_metadata_fields_dict ) assert len(list(dataset.data_rows())) == 1 @@ -424,25 +418,36 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_row_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) with pytest.raises(ResourceCreationError): dataset.create_data_row(row_data=image_url, metadata_fields=fields) -def test_create_data_rows_with_metadata(mdo, dataset, image_url): +def test_create_data_rows_with_metadata( + mdo, + dataset, + image_url, + constants, + make_metadata_fields, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 @@ -451,22 +456,22 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): { DataRow.row_data: image_url, DataRow.external_id: "row1", - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row2", - "metadata_fields": make_metadata_fields(), + "metadata_fields": make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row3", - DataRow.metadata_fields: make_metadata_fields_dict(), + DataRow.metadata_fields: make_metadata_fields_dict, }, { DataRow.row_data: image_url, DataRow.external_id: "row4", - "metadata_fields": make_metadata_fields_dict(), + "metadata_fields": make_metadata_fields_dict, }, ] ) @@ -488,9 +493,9 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): metadata = row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) @@ -507,14 +512,16 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): ], ) def test_create_data_rows_with_named_metadata_field_class( - test_function, metadata_obj_type, mdo, dataset, image_url + test_function, metadata_obj_type, mdo, dataset, image_url, constants ): row_with_metadata_field = { DataRow.row_data: image_url, DataRow.external_id: "row1", DataRow.metadata_fields: [ DataRowMetadataField(name="split", value="test"), - DataRowMetadataField(name=CUSTOM_TEXT_SCHEMA_NAME, value="hello"), + DataRowMetadataField( + name=constants["CUSTOM_TEXT_SCHEMA_NAME"], value="hello" + ), ], } @@ -523,7 +530,7 @@ def test_create_data_rows_with_named_metadata_field_class( DataRow.external_id: "row2", "metadata_fields": [ {"name": "split", "value": "test"}, - {"name": CUSTOM_TEXT_SCHEMA_NAME, "value": "hello"}, + {"name": constants["CUSTOM_TEXT_SCHEMA_NAME"], "value": "hello"}, ], } @@ -555,21 +562,26 @@ def create_data_row(data_rows): assert len(created_rows[0].metadata) == 2 metadata = created_rows[0].metadata - assert metadata[0].schema_id == SPLIT_SCHEMA_ID + assert metadata[0].schema_id == constants["SPLIT_SCHEMA_ID"] assert metadata[0].name == "test" assert metadata[0].value == mdo.reserved_by_name["split"]["test"].uid - assert metadata[1].name == CUSTOM_TEXT_SCHEMA_NAME + assert metadata[1].name == constants["CUSTOM_TEXT_SCHEMA_NAME"] assert metadata[1].value == "hello" assert ( - metadata[1].schema_id == mdo.custom_by_name[CUSTOM_TEXT_SCHEMA_NAME].uid + metadata[1].schema_id + == mdo.custom_by_name[constants["CUSTOM_TEXT_SCHEMA_NAME"]].uid ) -def test_create_data_rows_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) task = dataset.create_data_rows( @@ -580,13 +592,15 @@ def test_create_data_rows_with_invalid_metadata(dataset, image_url): assert task.status == "COMPLETE" assert len(task.failed_data_rows) == 1 assert ( - f"A schemaId can only be specified once per DataRow : [{TEXT_SCHEMA_ID}]" + f"A schemaId can only be specified once per DataRow : [{constants['TEXT_SCHEMA_ID']}]" in task.failed_data_rows[0]["message"] ) -def test_create_data_rows_with_metadata_missing_value(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_value( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"schemaId": "some schema id"}) with pytest.raises(ValueError) as exc: @@ -601,8 +615,10 @@ def test_create_data_rows_with_metadata_missing_value(dataset, image_url): ) -def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_schema_id( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"value": "some value"}) with pytest.raises(ValueError) as exc: @@ -617,8 +633,10 @@ def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): ) -def test_create_data_rows_with_metadata_wrong_type(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_wrong_type( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append("Neither DataRowMetadataField or dict") with pytest.raises(ValueError) as exc: @@ -944,7 +962,11 @@ def test_does_not_update_not_provided_attachment_fields(data_row): assert attachment.attachment_type == "RAW_TEXT" -def test_create_data_rows_result(client, dataset, image_url): +def test_create_data_rows_result( + client, + dataset, + image_url, +): task = dataset.create_data_rows( [ { @@ -963,12 +985,14 @@ def test_create_data_rows_result(client, dataset, image_url): client.get_data_row(result["id"]) -def test_create_data_rows_local_file(dataset, sample_image): +def test_create_data_rows_local_file( + dataset, sample_image, make_metadata_fields +): task = dataset.create_data_rows( [ { DataRow.row_data: sample_image, - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, } ] ) diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py new file mode 100644 index 000000000..ee457a7fe --- /dev/null +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -0,0 +1,58 @@ +import json +import random + +import pytest + + +@pytest.fixture +def mmc_data_row(dataset, make_metadata_fields, embedding): + row_data = { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": ["root1"], + "actors": {}, + "messages": {}, + } + + vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + embeddings = [{"embedding_id": embedding.id, "vector": vector}] + + content_all = { + "row_data": row_data, + "attachments": [{"type": "RAW_TEXT", "value": "attachment value"}], + "metadata_fields": make_metadata_fields, + "embeddings": embeddings, + } + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + + +def test_mmc(mmc_data_row, embedding, constants): + data_row = mmc_data_row + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": ["root1"], + "actors": {}, + "messages": {}, + } + + metadata_fields = data_row.metadata_fields + metadata = data_row.metadata + assert len(metadata_fields) == 3 + assert len(metadata) == 3 + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() + + attachments = list(data_row.attachments()) + assert len(attachments) == 1 + + assert embedding.get_imported_vector_count() == 1 From 994b6dac60e18b01221624202edf06021689a468 Mon Sep 17 00:00:00 2001 From: mnoszczak <99751601+mnoszczak@users.noreply.github.com> Date: Mon, 7 Oct 2024 18:49:11 +0200 Subject: [PATCH 13/22] [PLT-0] Add missing tests (#1855) --- libs/labelbox/tests/data/annotation_import/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 2342a759a..001b96771 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -2399,6 +2399,10 @@ def expected_export_v2_document(): "height": 65.0, "width": 12.0, }, + "page_dimensions": { + "height": 792.0, + "width": 612.0, + }, }, ], "classifications": [ From 58a3f4c5aecf3456ed2c1bbb868c09b84ba692af Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 8 Oct 2024 12:08:09 -0700 Subject: [PATCH 14/22] [PLT-1614] Support data row / batch for live mmc projects (#1856) --- docs/labelbox/datarow_payload_templates.rst | 6 +++ libs/labelbox/src/labelbox/client.py | 45 ++++++++++++---- .../data/annotation_types/collection.py | 10 ++-- .../schema/data_row_payload_templates.py | 40 ++++++++++++++ libs/labelbox/src/labelbox/schema/project.py | 46 +++++++--------- libs/labelbox/tests/integration/conftest.py | 25 +++++++-- .../test_chat_evaluation_ontology_project.py | 34 +++--------- .../tests/integration/test_data_rows.py | 2 +- .../integration/test_labeling_service.py | 5 +- .../tests/integration/test_mmc_data_rows.py | 54 +++++++++++++------ .../integration/test_project_model_config.py | 15 +++--- .../test_project_set_model_setup_complete.py | 8 +-- 12 files changed, 183 insertions(+), 107 deletions(-) create mode 100644 docs/labelbox/datarow_payload_templates.rst create mode 100644 libs/labelbox/src/labelbox/schema/data_row_payload_templates.py diff --git a/docs/labelbox/datarow_payload_templates.rst b/docs/labelbox/datarow_payload_templates.rst new file mode 100644 index 000000000..34dac6111 --- /dev/null +++ b/docs/labelbox/datarow_payload_templates.rst @@ -0,0 +1,6 @@ +Datarow payload templates +=============================================================================================== + +.. automodule:: labelbox.schema.data_row_payload_templates + :members: + :show-inheritance: \ No newline at end of file diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 02c93850e..d842b6d54 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -7,6 +7,7 @@ import sys import time import urllib.parse +import warnings from collections import defaultdict from datetime import datetime, timezone from types import MappingProxyType @@ -910,11 +911,21 @@ def create_model_evaluation_project( ) -> Project: pass + @overload def create_model_evaluation_project( self, dataset_id: Optional[str] = None, dataset_name: Optional[str] = None, - data_row_count: int = 100, + data_row_count: Optional[int] = None, + **kwargs, + ) -> Project: + pass + + def create_model_evaluation_project( + self, + dataset_id: Optional[str] = None, + dataset_name: Optional[str] = None, + data_row_count: Optional[int] = None, **kwargs, ) -> Project: """ @@ -940,26 +951,38 @@ def create_model_evaluation_project( >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10) >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. + >>> client.create_model_evaluation_project(name=project_name) + >>> This creates a new project with no data rows. """ - if not dataset_id and not dataset_name: - raise ValueError( - "dataset_name or data_set_id must be present and not be an empty string." - ) - if data_row_count <= 0: - raise ValueError("data_row_count must be a positive integer.") + autogenerate_data_rows = False + dataset_name_or_id = None + append_to_existing_dataset = None + + if dataset_id or dataset_name: + autogenerate_data_rows = True if dataset_id: append_to_existing_dataset = True dataset_name_or_id = dataset_id - else: + elif dataset_name: append_to_existing_dataset = False dataset_name_or_id = dataset_name + if autogenerate_data_rows: + kwargs["dataset_name_or_id"] = dataset_name_or_id + kwargs["append_to_existing_dataset"] = append_to_existing_dataset + if data_row_count is None: + data_row_count = 100 + if data_row_count < 0: + raise ValueError("data_row_count must be a positive integer.") + kwargs["data_row_count"] = data_row_count + warnings.warn( + "Automatic generation of data rows of live model evaluation projects is deprecated. dataset_name_or_id, append_to_existing_dataset, data_row_count will be removed in a future version.", + DeprecationWarning, + ) + kwargs["media_type"] = MediaType.Conversational - kwargs["dataset_name_or_id"] = dataset_name_or_id - kwargs["append_to_existing_dataset"] = append_to_existing_dataset - kwargs["data_row_count"] = data_row_count kwargs["editor_task_type"] = EditorTaskType.ModelChatEvaluation.value return self._create_project(**kwargs) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index d90204309..a636a3b3a 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -1,14 +1,10 @@ import logging -from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Callable, Generator, Iterable, Union, Optional -from uuid import uuid4 import warnings +from typing import Callable, Generator, Iterable, Union -from tqdm import tqdm - -from labelbox.schema import ontology from labelbox.orm.model import Entity -from ..ontology import get_classifications, get_tools +from labelbox.schema import ontology + from ..generator import PrefetchGenerator from .label import Label diff --git a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py new file mode 100644 index 000000000..bf64e055f --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py @@ -0,0 +1,40 @@ +from typing import Dict, List + +from pydantic import BaseModel, Field + +from labelbox.schema.data_row import DataRowMetadataField + + +class ModelEvalutationTemlateRowData(BaseModel): + type: str = Field( + default="application/vnd.labelbox.conversational.model-chat-evaluation", + frozen=True, + ) + draft: bool = Field(default=True, frozen=True) + rootMessageIds: List[str] = Field(default=[]) + actors: Dict = Field(default={}) + version: int = Field(default=2, frozen=True) + messages: Dict = Field(default={}) + + +class ModelEvaluationTemplate(BaseModel): + """ + Use this class to create a model evaluation data row. + + Examples: + >>> data = ModelEvaluationTemplate() + >>> data.row_data.rootMessageIds = ["root1"] + >>> vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + >>> data.embeddings = [...] + >>> data.metadata_fields = [...] + >>> data.attachments = [...] + >>> content = data.model_dump() + >>> task = dataset.create_data_rows([content]) + """ + + row_data: ModelEvalutationTemlateRowData = Field( + default=ModelEvalutationTemlateRowData() + ) + attachments: List[Dict] = Field(default=[]) + embeddings: List[Dict] = Field(default=[]) + metadata_fields: List[DataRowMetadataField] = Field(default=[]) diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f8876f7c4..3d5f8ca92 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1,11 +1,11 @@ import json import logging -from string import Template import time import warnings from collections import namedtuple from datetime import datetime, timezone from pathlib import Path +from string import Template from typing import ( TYPE_CHECKING, Any, @@ -14,28 +14,18 @@ List, Optional, Tuple, - TypeVar, Union, overload, ) from urllib.parse import urlparse -from labelbox.schema.labeling_service import ( - LabelingService, - LabelingServiceStatus, -) -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -import requests - -from labelbox import parser from labelbox import utils -from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, + error_message_for_unparsed_graphql_error, ) from labelbox.orm import query from labelbox.orm.db_object import DbObject, Deletable, Updateable, experimental @@ -46,7 +36,6 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, - validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -54,22 +43,26 @@ from labelbox.schema.id_type import IdType from labelbox.schema.identifiable import DataRowIdentifier, GlobalKey, UniqueId from labelbox.schema.identifiables import DataRowIdentifiers, UniqueIds +from labelbox.schema.labeling_service import ( + LabelingService, + LabelingServiceStatus, +) +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard from labelbox.schema.media_type import MediaType from labelbox.schema.model_config import ModelConfig -from labelbox.schema.project_model_config import ProjectModelConfig -from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.resource_tag import ResourceTag -from labelbox.schema.task import Task -from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) +from labelbox.schema.project_model_config import ProjectModelConfig from labelbox.schema.project_overview import ( ProjectOverview, ProjectOverviewDetailed, ) +from labelbox.schema.queue_mode import QueueMode +from labelbox.schema.resource_tag import ResourceTag +from labelbox.schema.task import Task +from labelbox.schema.task_queue import TaskQueue if TYPE_CHECKING: from labelbox import BulkImportRequest @@ -579,7 +572,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -788,7 +781,9 @@ def create_batch( if self.queue_mode != QueueMode.Batch: raise ValueError("Project must be in batch mode") - if self.is_auto_data_generation(): + if ( + self.is_auto_data_generation() and not self.is_chat_evaluation() + ): # NOTE live chat evaluatiuon projects in sdk do not pre-generate data rows, but use batch as all other projects raise ValueError( "Cannot create batches for auto data generation projects" ) @@ -814,7 +809,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1088,8 +1083,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) @@ -1436,7 +1430,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1629,7 +1623,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 10b05681e..e73fef920 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -646,11 +646,28 @@ def chat_evaluation_ontology(client, rand_gen): @pytest.fixture -def live_chat_evaluation_project_with_new_dataset(client, rand_gen): +def live_chat_evaluation_project(client, rand_gen): project_name = f"test-model-evaluation-project-{rand_gen(str)}" - dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}" - project = client.create_model_evaluation_project( - name=project_name, dataset_name=dataset_name, data_row_count=1 + project = client.create_model_evaluation_project(name=project_name) + + yield project + + project.delete() + + +@pytest.fixture +def live_chat_evaluation_project_with_batch( + client, + rand_gen, + live_chat_evaluation_project, + offline_conversational_data_row, +): + project_name = f"test-model-evaluation-project-{rand_gen(str)}" + project = client.create_model_evaluation_project(name=project_name) + + project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects ) yield project diff --git a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py index 47e39e2cf..2c02b77ac 100644 --- a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py +++ b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py @@ -1,15 +1,12 @@ import pytest -from unittest.mock import patch from labelbox import MediaType from labelbox.schema.ontology_kind import OntologyKind -from labelbox.exceptions import MalformedQueryException def test_create_chat_evaluation_ontology_project( - client, chat_evaluation_ontology, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, offline_conversational_data_row, rand_gen, ): @@ -28,7 +25,7 @@ def test_create_chat_evaluation_ontology_project( assert classification.schema_id assert classification.feature_schema_id - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project assert project.model_setup_complete is None project.connect_ontology(ontology) @@ -36,28 +33,11 @@ def test_create_chat_evaluation_ontology_project( assert project.labeling_frontend().name == "Editor" assert project.ontology().name == ontology.name - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - project.create_batch( - rand_gen(str), - [offline_conversational_data_row.uid], # sample of data row objects - ) - - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - with patch( - "labelbox.schema.project.MAX_SYNC_BATCH_ROW_COUNT", new=0 - ): # force to async - project.create_batch( - rand_gen(str), - [ - offline_conversational_data_row.uid - ], # sample of data row objects - ) + batch = project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects + ) + assert batch def test_create_chat_evaluation_ontology_project_existing_dataset( diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 9f0429269..baa65db69 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -405,7 +405,7 @@ def test_create_data_row_with_metadata_dict( row_data=image_url, metadata_fields=make_metadata_fields_dict ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index 04a1cb507..03a5694a7 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -42,12 +42,11 @@ def test_request_labeling_service_moe_offline_project( def test_request_labeling_service_moe_project( - rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project_with_batch, chat_evaluation_ontology, model_config, ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project_with_batch project.connect_ontology(chat_evaluation_ontology) project.upsert_instructions("tests/integration/media/sample_pdf.pdf") diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py index ee457a7fe..3b4f95530 100644 --- a/libs/labelbox/tests/integration/test_mmc_data_rows.py +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -3,26 +3,35 @@ import pytest +from labelbox.schema.data_row_payload_templates import ModelEvaluationTemplate + @pytest.fixture -def mmc_data_row(dataset, make_metadata_fields, embedding): - row_data = { - "type": "application/vnd.labelbox.conversational.model-chat-evaluation", - "draft": True, - "rootMessageIds": ["root1"], - "actors": {}, - "messages": {}, - } +def mmc_data_row(dataset): + data = ModelEvaluationTemplate() + + content_all = data.model_dump() + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + +@pytest.fixture +def mmc_data_row_all(dataset, make_metadata_fields, embedding): + data = ModelEvaluationTemplate() + data.row_data.rootMessageIds = ["root1"] vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] - embeddings = [{"embedding_id": embedding.id, "vector": vector}] + data.embeddings = [{"embedding_id": embedding.id, "vector": vector}] + data.metadata_fields = make_metadata_fields + data.attachments = [{"type": "RAW_TEXT", "value": "attachment value"}] - content_all = { - "row_data": row_data, - "attachments": [{"type": "RAW_TEXT", "value": "attachment value"}], - "metadata_fields": make_metadata_fields, - "embeddings": embeddings, - } + content_all = data.model_dump() task = dataset.create_data_rows([content_all]) task.wait_till_done() assert task.status == "COMPLETE" @@ -34,14 +43,27 @@ def mmc_data_row(dataset, make_metadata_fields, embedding): data_row.delete() -def test_mmc(mmc_data_row, embedding, constants): +def test_mmc(mmc_data_row): data_row = mmc_data_row + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": [], + "actors": {}, + "messages": {}, + "version": 2, + } + + +def test_mmc_all(mmc_data_row_all, embedding, constants): + data_row = mmc_data_row_all assert json.loads(data_row.row_data) == { "type": "application/vnd.labelbox.conversational.model-chat-evaluation", "draft": True, "rootMessageIds": ["root1"], "actors": {}, "messages": {}, + "version": 2, } metadata_fields = data_row.metadata_fields diff --git a/libs/labelbox/tests/integration/test_project_model_config.py b/libs/labelbox/tests/integration/test_project_model_config.py index 2d783f62b..975a39afe 100644 --- a/libs/labelbox/tests/integration/test_project_model_config.py +++ b/libs/labelbox/tests/integration/test_project_model_config.py @@ -1,11 +1,10 @@ import pytest + from labelbox.exceptions import ResourceNotFoundError -def test_add_single_model_config( - live_chat_evaluation_project_with_new_dataset, model_config -): - configured_project = live_chat_evaluation_project_with_new_dataset +def test_add_single_model_config(live_chat_evaluation_project, model_config): + configured_project = live_chat_evaluation_project project_model_config_id = configured_project.add_model_config( model_config.uid ) @@ -22,11 +21,11 @@ def test_add_single_model_config( def test_add_multiple_model_config( client, rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, model_config, valid_model_id, ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project second_model_config = client.create_model_config( rand_gen(str), valid_model_id, {"param": "value"} ) @@ -52,9 +51,9 @@ def test_add_multiple_model_config( def test_delete_project_model_config( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project assert configured_project.delete_project_model_config( configured_project.add_model_config(model_config.uid) ) diff --git a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py index 1c3e68c9a..16a124945 100644 --- a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py +++ b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py @@ -4,9 +4,9 @@ def test_live_chat_evaluation_project( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project.set_project_model_setup_complete() assert bool(project.model_setup_complete) is True @@ -19,9 +19,9 @@ def test_live_chat_evaluation_project( def test_live_chat_evaluation_project_delete_cofig( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project_model_config_id = project.add_model_config(model_config.uid) assert project_model_config_id From b58e9023ea802e5b1638543932aa6bf896940363 Mon Sep 17 00:00:00 2001 From: sfendell-labelbox <150080555+sfendell-labelbox@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:32:41 -0700 Subject: [PATCH 15/22] Simplify some code. (#1857) --- libs/labelbox/src/labelbox/client.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index d842b6d54..671f8b8cc 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -955,21 +955,10 @@ def create_model_evaluation_project( >>> This creates a new project with no data rows. """ - autogenerate_data_rows = False - dataset_name_or_id = None - append_to_existing_dataset = None + dataset_name_or_id = dataset_id or dataset_name + append_to_existing_dataset = bool(dataset_id) - if dataset_id or dataset_name: - autogenerate_data_rows = True - - if dataset_id: - append_to_existing_dataset = True - dataset_name_or_id = dataset_id - elif dataset_name: - append_to_existing_dataset = False - dataset_name_or_id = dataset_name - - if autogenerate_data_rows: + if dataset_name_or_id: kwargs["dataset_name_or_id"] = dataset_name_or_id kwargs["append_to_existing_dataset"] = append_to_existing_dataset if data_row_count is None: From ceafd639f387fe2b071095e941198b0982255f4b Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 09:00:29 -0700 Subject: [PATCH 16/22] Address QA feedback (#1859) --- .../src/labelbox/schema/data_row_payload_templates.py | 9 +++++---- libs/labelbox/tests/integration/test_mmc_data_rows.py | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py index bf64e055f..2e2728daa 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py +++ b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py @@ -1,11 +1,11 @@ -from typing import Dict, List +from typing import Dict, List, Optional from pydantic import BaseModel, Field from labelbox.schema.data_row import DataRowMetadataField -class ModelEvalutationTemlateRowData(BaseModel): +class ModelEvalutationTemplateRowData(BaseModel): type: str = Field( default="application/vnd.labelbox.conversational.model-chat-evaluation", frozen=True, @@ -15,6 +15,7 @@ class ModelEvalutationTemlateRowData(BaseModel): actors: Dict = Field(default={}) version: int = Field(default=2, frozen=True) messages: Dict = Field(default={}) + global_key: Optional[str] = None class ModelEvaluationTemplate(BaseModel): @@ -32,8 +33,8 @@ class ModelEvaluationTemplate(BaseModel): >>> task = dataset.create_data_rows([content]) """ - row_data: ModelEvalutationTemlateRowData = Field( - default=ModelEvalutationTemlateRowData() + row_data: ModelEvalutationTemplateRowData = Field( + default=ModelEvalutationTemplateRowData() ) attachments: List[Dict] = Field(default=[]) embeddings: List[Dict] = Field(default=[]) diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py index 3b4f95530..2fa7bdd1b 100644 --- a/libs/labelbox/tests/integration/test_mmc_data_rows.py +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -10,7 +10,7 @@ def mmc_data_row(dataset): data = ModelEvaluationTemplate() - content_all = data.model_dump() + content_all = data.model_dump(exclude_none=True) task = dataset.create_data_rows([content_all]) task.wait_till_done() assert task.status == "COMPLETE" @@ -26,12 +26,13 @@ def mmc_data_row(dataset): def mmc_data_row_all(dataset, make_metadata_fields, embedding): data = ModelEvaluationTemplate() data.row_data.rootMessageIds = ["root1"] + data.row_data.global_key = "global_key" vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] data.embeddings = [{"embedding_id": embedding.id, "vector": vector}] data.metadata_fields = make_metadata_fields data.attachments = [{"type": "RAW_TEXT", "value": "attachment value"}] - content_all = data.model_dump() + content_all = data.model_dump(exclude_none=True) task = dataset.create_data_rows([content_all]) task.wait_till_done() assert task.status == "COMPLETE" @@ -64,6 +65,7 @@ def test_mmc_all(mmc_data_row_all, embedding, constants): "actors": {}, "messages": {}, "version": 2, + "globalKey": "global_key", } metadata_fields = data_row.metadata_fields From b0127cf9a59cf2e59bd2506ba48b26efb25866a9 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 10:14:27 -0700 Subject: [PATCH 17/22] Release v.5.2.0 prep (#1860) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 6 +- libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 96 +++++++++++++------------- 4 files changed, 55 insertions(+), 51 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 51648857e..5f1d50567 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '5.1.0' +release = '5.2.0' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index 6b23cf6bc..781e90ba4 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog -# Version 5.1.0 (2024-09-27) +# Version 5.2.0 (2024-10-09) ## Fixed +* Support data row / batch for live mmc projects([#1856](https://github.com/Labelbox/labelbox-python/pull/1856)) + +# Version 5.1.0 (2024-09-27) +## Added * Support self-signed SSL certs([#1811](https://github.com/Labelbox/labelbox-python/pull/1811)) * Rectangle units now correctly support percent inputs([#1848](https://github.com/Labelbox/labelbox-python/pull/1848)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index bc13a34d7..7b65e5bef 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "5.1.0" +version = "5.2.0" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 981520719..f5f6cd017 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,79 +1,79 @@ name = "labelbox" -__version__ = "5.1.0" +__version__ = "5.2.0" from labelbox.client import Client -from labelbox.schema.project import Project -from labelbox.schema.model import Model -from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( + LabelImport, MALPredictionImport, MEAPredictionImport, - LabelImport, MEAToMALPredictionImport, ) -from labelbox.schema.dataset import Dataset -from labelbox.schema.data_row import DataRow +from labelbox.schema.asset_attachment import AssetAttachment +from labelbox.schema.batch import Batch +from labelbox.schema.benchmark import Benchmark +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRow +from labelbox.schema.data_row_metadata import ( + DataRowMetadata, + DataRowMetadataField, + DataRowMetadataOntology, + DeleteDataRowMetadata, +) +from labelbox.schema.dataset import Dataset from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.label import Label -from labelbox.schema.batch import Batch -from labelbox.schema.review import Review -from labelbox.schema.user import User -from labelbox.schema.organization import Organization -from labelbox.schema.task import Task from labelbox.schema.export_task import ( - StreamType, + BufferedJsonConverterOutput, ExportTask, - JsonConverter, - JsonConverterOutput, FileConverter, FileConverterOutput, - BufferedJsonConverterOutput, + JsonConverter, + JsonConverterOutput, + StreamType, ) +from labelbox.schema.iam_integration import IAMIntegration +from labelbox.schema.identifiable import GlobalKey, UniqueId +from labelbox.schema.identifiables import DataRowIds, GlobalKeys, UniqueIds +from labelbox.schema.invite import Invite, InviteLimit +from labelbox.schema.label import Label +from labelbox.schema.label_score import LabelScore from labelbox.schema.labeling_frontend import ( LabelingFrontend, LabelingFrontendOptions, ) -from labelbox.schema.asset_attachment import AssetAttachment -from labelbox.schema.webhook import Webhook +from labelbox.schema.labeling_service import LabelingService +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.media_type import MediaType +from labelbox.schema.model import Model +from labelbox.schema.model_config import ModelConfig +from labelbox.schema.model_run import DataSplit, ModelRun from labelbox.schema.ontology import ( + Classification, + FeatureSchema, Ontology, OntologyBuilder, - Classification, Option, + PromptResponseClassification, + ResponseOption, Tool, - FeatureSchema, ) -from labelbox.schema.ontology import PromptResponseClassification -from labelbox.schema.ontology import ResponseOption -from labelbox.schema.role import Role, ProjectRole -from labelbox.schema.invite import Invite, InviteLimit -from labelbox.schema.data_row_metadata import ( - DataRowMetadataOntology, - DataRowMetadataField, - DataRowMetadata, - DeleteDataRowMetadata, -) -from labelbox.schema.model_run import ModelRun, DataSplit -from labelbox.schema.benchmark import Benchmark -from labelbox.schema.iam_integration import IAMIntegration -from labelbox.schema.resource_tag import ResourceTag -from labelbox.schema.project_model_config import ProjectModelConfig -from labelbox.schema.project_resource_tag import ProjectResourceTag -from labelbox.schema.media_type import MediaType -from labelbox.schema.slice import Slice, CatalogSlice, ModelSlice -from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.task_queue import TaskQueue -from labelbox.schema.label_score import LabelScore -from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds -from labelbox.schema.identifiable import UniqueId, GlobalKey from labelbox.schema.ontology_kind import OntologyKind +from labelbox.schema.organization import Organization +from labelbox.schema.project import Project +from labelbox.schema.project_model_config import ProjectModelConfig from labelbox.schema.project_overview import ( ProjectOverview, ProjectOverviewDetailed, ) -from labelbox.schema.labeling_service import LabelingService -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.project_resource_tag import ProjectResourceTag +from labelbox.schema.queue_mode import QueueMode +from labelbox.schema.resource_tag import ResourceTag +from labelbox.schema.review import Review +from labelbox.schema.role import ProjectRole, Role +from labelbox.schema.slice import CatalogSlice, ModelSlice, Slice +from labelbox.schema.task import Task +from labelbox.schema.task_queue import TaskQueue +from labelbox.schema.user import User +from labelbox.schema.webhook import Webhook From 9a127094222916b8981740a050dd72a69e2c4cc0 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 17:33:11 -0700 Subject: [PATCH 18/22] Fix encoding for exporter (#1862) --- libs/labelbox/src/labelbox/schema/export_task.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/export_task.py b/libs/labelbox/src/labelbox/schema/export_task.py index a144f4c76..76fd8a739 100644 --- a/libs/labelbox/src/labelbox/schema/export_task.py +++ b/libs/labelbox/src/labelbox/schema/export_task.py @@ -1,11 +1,16 @@ +import json +import os +import tempfile +import warnings from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum from functools import lru_cache from io import TextIOWrapper -import json from pathlib import Path from typing import ( + TYPE_CHECKING, + Any, Callable, Generic, Iterator, @@ -14,19 +19,14 @@ Tuple, TypeVar, Union, - TYPE_CHECKING, overload, - Any, ) import requests -import warnings -import tempfile -import os +from pydantic import BaseModel from labelbox.schema.task import Task from labelbox.utils import _CamelCaseMixin -from pydantic import BaseModel, Field, AliasChoices if TYPE_CHECKING: from labelbox import Client @@ -241,6 +241,7 @@ def _get_file_content( ) response = requests.get(file_info.file, timeout=30) response.raise_for_status() + response.encoding = "utf-8" assert ( len(response.content) == file_info.offsets.end - file_info.offsets.start + 1 From a75c402c2a6854b59ebab61d4950db5cb1202db7 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 9 Oct 2024 17:36:50 -0700 Subject: [PATCH 19/22] Release v.5.2.1 prep (#1863) --- docs/conf.py | 2 +- libs/labelbox/CHANGELOG.md | 6 +++++- libs/labelbox/pyproject.toml | 2 +- libs/labelbox/src/labelbox/__init__.py | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 5f1d50567..07656e3a0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '5.2.0' +release = '5.2.1' # -- General configuration --------------------------------------------------- diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index 781e90ba4..c6a21580c 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog -# Version 5.2.0 (2024-10-09) +# Version 5.2.1 (2024-10-09) ## Fixed +* Exporter encoding + +# Version 5.2.0 (2024-10-09) +## Added * Support data row / batch for live mmc projects([#1856](https://github.com/Labelbox/labelbox-python/pull/1856)) # Version 5.1.0 (2024-09-27) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index 7b65e5bef..ee2f9b859 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "5.2.0" +version = "5.2.1" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index f5f6cd017..b0f4ebe11 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,6 +1,6 @@ name = "labelbox" -__version__ = "5.2.0" +__version__ = "5.2.1" from labelbox.client import Client from labelbox.schema.annotation_import import ( From 0560dc95beccf1e6100ce1864c8f7994b041c708 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 10 Oct 2024 16:29:40 -0700 Subject: [PATCH 20/22] Fix create_model_evaluation_project --- libs/labelbox/src/labelbox/client.py | 16 ---------------- libs/labelbox/src/labelbox/project_validation.py | 3 +++ .../tests/integration/test_labeling_service.py | 5 ++--- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 9014c2613..bcf29665e 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -638,7 +638,6 @@ def create_project( } return self._create_project(_CoreProjectInput(**input)) - @overload def create_model_evaluation_project( self, name: str, @@ -652,16 +651,6 @@ def create_model_evaluation_project( dataset_id: Optional[str] = None, dataset_name: Optional[str] = None, data_row_count: Optional[int] = None, - **kwargs, - ) -> Project: - pass - - def create_model_evaluation_project( - self, - dataset_id: Optional[str] = None, - dataset_name: Optional[str] = None, - data_row_count: Optional[int] = None, - **kwargs, ) -> Project: """ Use this method exclusively to create a chat model evaluation project. @@ -694,13 +683,8 @@ def create_model_evaluation_project( append_to_existing_dataset = bool(dataset_id) if dataset_name_or_id: - kwargs["dataset_name_or_id"] = dataset_name_or_id - kwargs["append_to_existing_dataset"] = append_to_existing_dataset if data_row_count is None: data_row_count = 100 - if data_row_count < 0: - raise ValueError("data_row_count must be a positive integer.") - kwargs["data_row_count"] = data_row_count warnings.warn( "Automatic generation of data rows of live model evaluation projects is deprecated. dataset_name_or_id, append_to_existing_dataset, data_row_count will be removed in a future version.", DeprecationWarning, diff --git a/libs/labelbox/src/labelbox/project_validation.py b/libs/labelbox/src/labelbox/project_validation.py index 41f1fa762..2a6db9e2a 100644 --- a/libs/labelbox/src/labelbox/project_validation.py +++ b/libs/labelbox/src/labelbox/project_validation.py @@ -69,6 +69,9 @@ def validate_fields(self): is_consensus_enabled=True, ) + if self.data_row_count is not None and self.data_row_count < 0: + raise ValueError("data_row_count must be a positive integer.") + return self def _set_quality_mode_attributes( diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index fc604ac8a..e8b3d4cdc 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -1,7 +1,6 @@ import pytest from lbox.exceptions import ( LabelboxError, - MalformedQueryException, ResourceNotFoundError, ) @@ -54,7 +53,7 @@ def test_request_labeling_service_moe_project( labeling_service = project.get_labeling_service() with pytest.raises( - MalformedQueryException, + LabelboxError, match='[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]', ): labeling_service.request() @@ -76,5 +75,5 @@ def test_request_labeling_service_incomplete_requirements(ontology, project): ): # No labeling service by default labeling_service.request() project.connect_ontology(ontology) - with pytest.raises(MalformedQueryException): + with pytest.raises(LabelboxError): labeling_service.request() From 4f7b6bb4968d393b54baa6c5796992e2e3677a49 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:47:50 -0500 Subject: [PATCH 21/22] fixed export typos --- libs/labelbox/src/labelbox/schema/catalog.py | 2 +- libs/labelbox/src/labelbox/schema/data_row.py | 2 +- libs/labelbox/src/labelbox/schema/dataset.py | 2 +- libs/labelbox/src/labelbox/schema/model_run.py | 2 +- libs/labelbox/src/labelbox/schema/project.py | 2 +- libs/labelbox/src/labelbox/schema/slice.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/catalog.py b/libs/labelbox/src/labelbox/schema/catalog.py index df50503ad..8d9646779 100644 --- a/libs/labelbox/src/labelbox/schema/catalog.py +++ b/libs/labelbox/src/labelbox/schema/catalog.py @@ -48,7 +48,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/data_row.py b/libs/labelbox/src/labelbox/schema/data_row.py index ed1184c12..cb0e99b22 100644 --- a/libs/labelbox/src/labelbox/schema/data_row.py +++ b/libs/labelbox/src/labelbox/schema/data_row.py @@ -280,7 +280,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index df467caf1..107f3f50b 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -360,7 +360,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/model_run.py b/libs/labelbox/src/labelbox/schema/model_run.py index 053c43b97..dcdbdf0e8 100644 --- a/libs/labelbox/src/labelbox/schema/model_run.py +++ b/libs/labelbox/src/labelbox/schema/model_run.py @@ -541,7 +541,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 771ca3785..a6f2dfe28 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -417,7 +417,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/slice.py b/libs/labelbox/src/labelbox/schema/slice.py index 3bad8cf07..a640ebc1d 100644 --- a/libs/labelbox/src/labelbox/schema/slice.py +++ b/libs/labelbox/src/labelbox/schema/slice.py @@ -129,7 +129,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) From dcfb0298294f217f787d44f02c0f9669b969afde Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 10 Oct 2024 16:48:44 -0700 Subject: [PATCH 22/22] Update metadata to indicate 3.13 not supported yet --- libs/labelbox/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index 06a75ec19..f58dba890 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "lbox-clients==1.1.0", ] readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.9,<3.13" classifiers = [ # How mature is this project? "Development Status :: 5 - Production/Stable",