diff --git a/docs/conf.py b/docs/conf.py index 51648857e..07656e3a0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '5.1.0' +release = '5.2.1' # -- General configuration --------------------------------------------------- diff --git a/docs/labelbox/datarow_payload_templates.rst b/docs/labelbox/datarow_payload_templates.rst new file mode 100644 index 000000000..34dac6111 --- /dev/null +++ b/docs/labelbox/datarow_payload_templates.rst @@ -0,0 +1,6 @@ +Datarow payload templates +=============================================================================================== + +.. automodule:: labelbox.schema.data_row_payload_templates + :members: + :show-inheritance: \ No newline at end of file diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index 6b23cf6bc..c6a21580c 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,6 +1,14 @@ # Changelog -# Version 5.1.0 (2024-09-27) +# Version 5.2.1 (2024-10-09) ## Fixed +* Exporter encoding + +# Version 5.2.0 (2024-10-09) +## Added +* Support data row / batch for live mmc projects([#1856](https://github.com/Labelbox/labelbox-python/pull/1856)) + +# Version 5.1.0 (2024-09-27) +## Added * Support self-signed SSL certs([#1811](https://github.com/Labelbox/labelbox-python/pull/1811)) * Rectangle units now correctly support percent inputs([#1848](https://github.com/Labelbox/labelbox-python/pull/1848)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index fa64c74f6..06a75ec19 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "5.1.0" +version = "5.2.1" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 850aec0be..9da8edc17 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,45 +1,59 @@ name = "labelbox" -__version__ = "5.1.0" +__version__ = "5.2.1" from labelbox.client import Client -from labelbox.schema.project import Project -from labelbox.schema.model import Model -from labelbox.schema.model_config import ModelConfig from labelbox.schema.annotation_import import ( + LabelImport, MALPredictionImport, MEAPredictionImport, - LabelImport, MEAToMALPredictionImport, ) -from labelbox.schema.dataset import Dataset -from labelbox.schema.data_row import DataRow +from labelbox.schema.asset_attachment import AssetAttachment +from labelbox.schema.batch import Batch +from labelbox.schema.benchmark import Benchmark +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRow +from labelbox.schema.data_row_metadata import ( + DataRowMetadata, + DataRowMetadataField, + DataRowMetadataOntology, + DeleteDataRowMetadata, +) +from labelbox.schema.dataset import Dataset from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.label import Label -from labelbox.schema.batch import Batch -from labelbox.schema.review import Review -from labelbox.schema.user import User -from labelbox.schema.organization import Organization -from labelbox.schema.task import Task from labelbox.schema.export_task import ( - StreamType, + BufferedJsonConverterOutput, ExportTask, BufferedJsonConverterOutput, ) +from labelbox.schema.iam_integration import IAMIntegration +from labelbox.schema.identifiable import GlobalKey, UniqueId +from labelbox.schema.identifiables import DataRowIds, GlobalKeys, UniqueIds +from labelbox.schema.invite import Invite, InviteLimit +from labelbox.schema.label import Label +from labelbox.schema.label_score import LabelScore from labelbox.schema.labeling_frontend import ( LabelingFrontend, LabelingFrontendOptions, ) -from labelbox.schema.asset_attachment import AssetAttachment -from labelbox.schema.webhook import Webhook +from labelbox.schema.labeling_service import LabelingService +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.media_type import MediaType +from labelbox.schema.model import Model +from labelbox.schema.model_config import ModelConfig +from labelbox.schema.model_run import DataSplit, ModelRun from labelbox.schema.ontology import ( + Classification, + FeatureSchema, Ontology, OntologyBuilder, - Classification, Option, + PromptResponseClassification, + ResponseOption, Tool, - FeatureSchema, ) from labelbox.schema.ontology import PromptResponseClassification from labelbox.schema.ontology import ResponseOption @@ -64,10 +78,20 @@ from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds from labelbox.schema.identifiable import UniqueId, GlobalKey from labelbox.schema.ontology_kind import OntologyKind +from labelbox.schema.organization import Organization +from labelbox.schema.project import Project +from labelbox.schema.project_model_config import ProjectModelConfig from labelbox.schema.project_overview import ( ProjectOverview, ProjectOverviewDetailed, ) -from labelbox.schema.labeling_service import LabelingService -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.project_resource_tag import ProjectResourceTag +from labelbox.schema.queue_mode import QueueMode +from labelbox.schema.resource_tag import ResourceTag +from labelbox.schema.review import Review +from labelbox.schema.role import ProjectRole, Role +from labelbox.schema.slice import CatalogSlice, ModelSlice, Slice +from labelbox.schema.task import Task +from labelbox.schema.task_queue import TaskQueue +from labelbox.schema.user import User +from labelbox.schema.webhook import Webhook diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index aa08ab0b3..109e3ae55 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -6,6 +6,7 @@ import random import time import urllib.parse +import warnings from collections import defaultdict from datetime import datetime, timezone from types import MappingProxyType @@ -637,6 +638,7 @@ def create_project( } return self._create_project(_CoreProjectInput(**input)) + @overload def create_model_evaluation_project( self, name: str, @@ -649,7 +651,17 @@ def create_model_evaluation_project( is_consensus_enabled: Optional[bool] = None, dataset_id: Optional[str] = None, dataset_name: Optional[str] = None, - data_row_count: int = 100, + data_row_count: Optional[int] = None, + **kwargs, + ) -> Project: + pass + + def create_model_evaluation_project( + self, + dataset_id: Optional[str] = None, + dataset_name: Optional[str] = None, + data_row_count: Optional[int] = None, + **kwargs, ) -> Project: """ Use this method exclusively to create a chat model evaluation project. @@ -674,22 +686,28 @@ def create_model_evaluation_project( >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10) >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. + >>> client.create_model_evaluation_project(name=project_name) + >>> This creates a new project with no data rows. """ - if not dataset_id and not dataset_name: - raise ValueError( - "dataset_name or data_set_id must be present and not be an empty string." - ) + dataset_name_or_id = dataset_id or dataset_name + append_to_existing_dataset = bool(dataset_id) - if dataset_id: - append_to_existing_dataset = True - dataset_name_or_id = dataset_id - else: - append_to_existing_dataset = False - dataset_name_or_id = dataset_name + if dataset_name_or_id: + kwargs["dataset_name_or_id"] = dataset_name_or_id + kwargs["append_to_existing_dataset"] = append_to_existing_dataset + if data_row_count is None: + data_row_count = 100 + if data_row_count < 0: + raise ValueError("data_row_count must be a positive integer.") + kwargs["data_row_count"] = data_row_count + warnings.warn( + "Automatic generation of data rows of live model evaluation projects is deprecated. dataset_name_or_id, append_to_existing_dataset, data_row_count will be removed in a future version.", + DeprecationWarning, + ) - media_type = MediaType.Conversational - editor_task_type = EditorTaskType.ModelChatEvaluation + kwargs["media_type"] = MediaType.Conversational + kwargs["editor_task_type"] = EditorTaskType.ModelChatEvaluation.value input = { "name": name, diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 51bcce1b2..42d2a1184 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -1,14 +1,10 @@ import logging -from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Callable, Generator, Iterable, Union, Optional -from uuid import uuid4 import warnings +from typing import Callable, Generator, Iterable, Union -from tqdm import tqdm - -from labelbox.schema import ontology from labelbox.orm.model import Entity -from ..ontology import get_classifications, get_tools +from labelbox.schema import ontology + from ..generator import PrefetchGenerator from .label import Label diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 535ab0f7d..a2a0fbd91 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -387,6 +387,7 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] + BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index d6b74de68..232741b5d 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,5 +1,7 @@ import labelbox.schema.asset_attachment import labelbox.schema.annotation_import +import labelbox.schema.asset_attachment +import labelbox.schema.annotation_import import labelbox.schema.benchmark import labelbox.schema.data_row import labelbox.schema.dataset diff --git a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py new file mode 100644 index 000000000..2e2728daa --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py @@ -0,0 +1,41 @@ +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +from labelbox.schema.data_row import DataRowMetadataField + + +class ModelEvalutationTemplateRowData(BaseModel): + type: str = Field( + default="application/vnd.labelbox.conversational.model-chat-evaluation", + frozen=True, + ) + draft: bool = Field(default=True, frozen=True) + rootMessageIds: List[str] = Field(default=[]) + actors: Dict = Field(default={}) + version: int = Field(default=2, frozen=True) + messages: Dict = Field(default={}) + global_key: Optional[str] = None + + +class ModelEvaluationTemplate(BaseModel): + """ + Use this class to create a model evaluation data row. + + Examples: + >>> data = ModelEvaluationTemplate() + >>> data.row_data.rootMessageIds = ["root1"] + >>> vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + >>> data.embeddings = [...] + >>> data.metadata_fields = [...] + >>> data.attachments = [...] + >>> content = data.model_dump() + >>> task = dataset.create_data_rows([content]) + """ + + row_data: ModelEvalutationTemplateRowData = Field( + default=ModelEvalutationTemplateRowData() + ) + attachments: List[Dict] = Field(default=[]) + embeddings: List[Dict] = Field(default=[]) + metadata_fields: List[DataRowMetadataField] = Field(default=[]) diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index dfc87c8a4..6f8aebc58 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,6 +1,31 @@ from enum import Enum +class BulkImportRequestState(Enum): + """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). + + If you are not usinig MEA continue using BulkImportRequest. + AnnotationImports are in beta and will change soon. + + .. list-table:: + :widths: 15 150 + :header-rows: 1 + + * - State + - Description + * - RUNNING + - Indicates that the import job is not done yet. + * - FAILED + - Indicates the import job failed. Check `BulkImportRequest.errors` for more information + * - FINISHED + - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information + """ + + RUNNING = "RUNNING" + FAILED = "FAILED" + FINISHED = "FINISHED" + + class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/export_task.py b/libs/labelbox/src/labelbox/schema/export_task.py index b2e1d054f..7e78fc3e9 100644 --- a/libs/labelbox/src/labelbox/schema/export_task.py +++ b/libs/labelbox/src/labelbox/schema/export_task.py @@ -1,9 +1,14 @@ +import json +import os +import tempfile +import warnings from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum from functools import lru_cache -import json from typing import ( + TYPE_CHECKING, + Any, Callable, Generic, Iterator, @@ -11,17 +16,13 @@ Tuple, TypeVar, Union, - TYPE_CHECKING, - Any, ) import requests -import tempfile -import os +from pydantic import BaseModel from labelbox.schema.task import Task from labelbox.utils import _CamelCaseMixin -from pydantic import BaseModel if TYPE_CHECKING: from labelbox import Client @@ -120,6 +121,7 @@ def _get_file_content( ) response = requests.get(file_info.file, timeout=30) response.raise_for_status() + response.encoding = "utf-8" assert ( len(response.content) == file_info.offsets.end - file_info.offsets.start + 1 diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index ae97089a7..a47e44e25 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -4,17 +4,20 @@ import warnings from collections import namedtuple from datetime import datetime, timezone +from pathlib import Path from string import Template from typing import ( TYPE_CHECKING, Any, Dict, + Iterable, List, Optional, Tuple, Union, get_args, ) +from urllib.parse import urlparse from lbox.exceptions import ( InvalidQueryError, @@ -35,6 +38,7 @@ from labelbox.schema.export_filters import ( ProjectExportFilters, build_filters, + validate_datetime, ) from labelbox.schema.export_params import ProjectExportParams from labelbox.schema.export_task import ExportTask @@ -66,7 +70,7 @@ from labelbox.schema.task_queue import TaskQueue if TYPE_CHECKING: - pass + from labelbox import BulkImportRequest DataRowPriority = int @@ -739,7 +743,9 @@ def create_batch( lbox.exceptions.ValueError if a project is not batch mode, if the project is auto data generation, if the batch exceeds 100k data rows """ - if self.is_auto_data_generation(): + if ( + self.is_auto_data_generation() and not self.is_chat_evaluation() + ): # NOTE live chat evaluatiuon projects in sdk do not pre-generate data rows, but use batch as all other projects raise ValueError( "Cannot create batches for auto data generation projects" ) @@ -1307,6 +1313,33 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] + def bulk_import_requests(self) -> PaginatedCollection: + """Returns bulk import request objects which are used in model-assisted labeling. + These are returned with the oldest first, and most recent last. + """ + + id_param = "project_id" + query_str = """query ListAllImportRequestsPyApi($%s: ID!) { + bulkImportRequests ( + where: { projectId: $%s } + skip: %%d + first: %%d + ) { + %s + } + }""" % ( + id_param, + id_param, + query.results_query_part(Entity.BulkImportRequest), + ) + return PaginatedCollection( + self.client, + query_str, + {id_param: str(self.uid)}, + ["bulkImportRequests"], + Entity.BulkImportRequest, + ) + def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1418,6 +1451,77 @@ def _wait_for_task(self, task_id: str) -> Task: return task + def upload_annotations( + self, + name: str, + annotations: Union[str, Path, Iterable[Dict]], + validate: bool = False, + ) -> "BulkImportRequest": # type: ignore + """Uploads annotations to a new Editor project. + + Args: + name (str): name of the BulkImportRequest job + annotations (str or Path or Iterable): + url that is publicly accessible by Labelbox containing an + ndjson file + OR local path to an ndjson file + OR iterable of annotation rows + validate (bool): + Whether or not to validate the payload before uploading. + Returns: + BulkImportRequest + """ + + if isinstance(annotations, str) or isinstance(annotations, Path): + + def _is_url_valid(url: Union[str, Path]) -> bool: + """Verifies that the given string is a valid url. + + Args: + url: string to be checked + Returns: + True if the given url is valid otherwise False + + """ + if isinstance(url, Path): + return False + parsed = urlparse(url) + return bool(parsed.scheme) and bool(parsed.netloc) + + if _is_url_valid(annotations): + return Entity.BulkImportRequest.create_from_url( + client=self.client, + project_id=self.uid, + name=name, + url=str(annotations), + validate=validate, + ) + else: + path = Path(annotations) + if not path.exists(): + raise FileNotFoundError( + f"{annotations} is not a valid url nor existing local file" + ) + return Entity.BulkImportRequest.create_from_local_file( + client=self.client, + project_id=self.uid, + name=name, + file=path, + validate_file=validate, + ) + elif isinstance(annotations, Iterable): + return Entity.BulkImportRequest.create_from_objects( + client=self.client, + project_id=self.uid, + name=name, + predictions=annotations, # type: ignore + validate=validate, + ) + else: + raise ValueError( + f"Invalid annotations given of type: {type(annotations)}" + ) + def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index a07d52c4d..84908ac28 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -1055,7 +1055,7 @@ def project_with_one_feature_ontology(project, client: Client): @pytest.fixture def configured_project_with_complex_ontology( - client: Client, initial_dataset, rand_gen, image_url, teardown_helpers + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 2342a759a..001b96771 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -2399,6 +2399,10 @@ def expected_export_v2_document(): "height": 65.0, "width": 12.0, }, + "page_dimensions": { + "height": 792.0, + "width": 612.0, + }, }, ], "classifications": [ diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py new file mode 100644 index 000000000..9abae1422 --- /dev/null +++ b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py @@ -0,0 +1,258 @@ +from unittest.mock import patch +import uuid +from labelbox import parser, Project +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +import pytest +import random +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.rectangle import ( + Rectangle, + RectangleUnit, +) +from labelbox.data.annotation_types.label import Label +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.ner import ( + DocumentEntity, + DocumentTextSelection, +) +from labelbox.data.annotation_types.video import VideoObjectAnnotation + +from labelbox.data.serialization import NDJsonConverter +from labelbox.exceptions import MALValidationError, UuidError +from labelbox.schema.bulk_import_request import BulkImportRequest +from labelbox.schema.enums import BulkImportRequestState +from labelbox.schema.annotation_import import LabelImport, MALPredictionImport +from labelbox.schema.media_type import MediaType + +""" +- Here we only want to check that the uploads are calling the validation +- Then with unit tests we can check the types of errors raised +""" +# TODO: remove library once bulk import requests are removed + + +@pytest.mark.order(1) +def test_create_from_url(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_file(module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + with pytest.raises(MALValidationError): + module_project.upload_annotations( + name=name, annotations=url, validate=True + ) + # Schema ids shouldn't match + + +def test_create_from_objects( + module_project: Project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_create_from_label_objects( + module_project, predictions, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + + labels = list(NDJsonConverter.deserialize(predictions)) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=labels + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + normalized_predictions = list(NDJsonConverter.serialize(labels)) + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, normalized_predictions + ) + + +def test_create_from_local_file( + tmp_path, predictions, module_project, annotation_import_test_helpers +): + name = str(uuid.uuid4()) + file_name = f"{name}.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + parser.dump(predictions, f) + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=str(file_path), validate=False + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + annotation_import_test_helpers.assert_file_content( + bulk_import_request.input_file_url, predictions + ) + + +def test_get(client, module_project): + name = str(uuid.uuid4()) + url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + module_project.upload_annotations( + name=name, annotations=url, validate=False + ) + + bulk_import_request = BulkImportRequest.from_name( + client, project_id=module_project.uid, name=name + ) + + assert bulk_import_request.project() == module_project + assert bulk_import_request.name == name + assert bulk_import_request.input_file_url == url + assert bulk_import_request.error_file_url is None + assert bulk_import_request.status_file_url is None + assert bulk_import_request.state == BulkImportRequestState.RUNNING + + +def test_validate_ndjson(tmp_path, module_project): + file_name = f"broken.ndjson" + file_path = tmp_path / file_name + with file_path.open("w") as f: + f.write("test") + + with pytest.raises(ValueError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + +def test_validate_ndjson_uuid(tmp_path, module_project, predictions): + file_name = f"repeat_uuid.ndjson" + file_path = tmp_path / file_name + repeat_uuid = predictions.copy() + uid = str(uuid.uuid4()) + repeat_uuid[0]["uuid"] = uid + repeat_uuid[1]["uuid"] = uid + + with file_path.open("w") as f: + parser.dump(repeat_uuid, f) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=str(file_path) + ) + + with pytest.raises(UuidError): + module_project.upload_annotations( + name="name", validate=True, annotations=repeat_uuid + ) + + +@pytest.mark.skip( + "Slow test and uses a deprecated api endpoint for annotation imports" +) +def test_wait_till_done(rectangle_inference, project): + name = str(uuid.uuid4()) + url = project.client.upload_data( + content=parser.dumps(rectangle_inference), sign=True + ) + bulk_import_request = project.upload_annotations( + name=name, annotations=url, validate=False + ) + + assert len(bulk_import_request.inputs) == 1 + bulk_import_request.wait_until_done() + assert bulk_import_request.state == BulkImportRequestState.FINISHED + + # Check that the status files are being returned as expected + assert len(bulk_import_request.errors) == 0 + assert len(bulk_import_request.inputs) == 1 + assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] + assert len(bulk_import_request.statuses) == 1 + assert bulk_import_request.statuses[0]["status"] == "SUCCESS" + assert ( + bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] + ) + + +def test_project_bulk_import_requests(module_project, predictions): + result = module_project.bulk_import_requests() + assert len(list(result)) == 0 + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + name = str(uuid.uuid4()) + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + + result = module_project.bulk_import_requests() + assert len(list(result)) == 3 + + +def test_delete(module_project, predictions): + name = str(uuid.uuid4()) + + bulk_import_requests = module_project.bulk_import_requests() + [ + bulk_import_request.delete() + for bulk_import_request in bulk_import_requests + ] + + bulk_import_request = module_project.upload_annotations( + name=name, annotations=predictions + ) + bulk_import_request.wait_until_done() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 1 + + bulk_import_request.delete() + all_import_requests = module_project.bulk_import_requests() + assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index 921e98c9d..1cc5538d9 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -29,6 +29,78 @@ def validate_iso_format(date_string: str): assert parsed_t.second is not None +@pytest.mark.parametrize( + "media_type, data_type_class", + [ + (MediaType.Audio, GenericDataRowData), + (MediaType.Html, GenericDataRowData), + (MediaType.Image, GenericDataRowData), + (MediaType.Text, GenericDataRowData), + (MediaType.Video, GenericDataRowData), + (MediaType.Conversational, GenericDataRowData), + (MediaType.Document, GenericDataRowData), + (MediaType.LLMPromptResponseCreation, GenericDataRowData), + (MediaType.LLMPromptCreation, GenericDataRowData), + (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), + ], +) +def test_generic_data_row_type_by_data_row_id( + media_type, + data_type_class, + annotations_by_media_type, + hardcoded_datarow_id, +): + annotations_ndjson = annotations_by_media_type[media_type] + annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] + + label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] + + data_label = Label( + data=data_type_class(uid=hardcoded_datarow_id()), + annotations=label.annotations, + ) + + assert data_label.data.uid == label.data.uid + assert label.annotations == data_label.annotations + + +@pytest.mark.parametrize( + "media_type, data_type_class", + [ + (MediaType.Audio, GenericDataRowData), + (MediaType.Html, GenericDataRowData), + (MediaType.Image, GenericDataRowData), + (MediaType.Text, GenericDataRowData), + (MediaType.Video, GenericDataRowData), + (MediaType.Conversational, GenericDataRowData), + (MediaType.Document, GenericDataRowData), + # (MediaType.LLMPromptResponseCreation, GenericDataRowData), + # (MediaType.LLMPromptCreation, GenericDataRowData), + (OntologyKind.ResponseCreation, GenericDataRowData), + (OntologyKind.ModelEvaluation, GenericDataRowData), + ], +) +def test_generic_data_row_type_by_global_key( + media_type, + data_type_class, + annotations_by_media_type, + hardcoded_global_key, +): + annotations_ndjson = annotations_by_media_type[media_type] + annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] + + label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] + + data_label = Label( + data=data_type_class(global_key=hardcoded_global_key()), + annotations=label.annotations, + ) + + assert data_label.data.global_key == label.data.global_key + assert label.annotations == data_label.annotations + + @pytest.mark.parametrize( "configured_project, media_type", [ diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json new file mode 100644 index 000000000..4de15e217 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json @@ -0,0 +1,54 @@ +[ + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "confidence": 0.8, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" + }, + { + "answer": [ + { + "schemaId": "ckrb1sfl8099e0y919v260awv", + "confidence": 0.82, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } + ], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json new file mode 100644 index 000000000..83a95e5bf --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json @@ -0,0 +1,25 @@ +[{ + "location": { + "start": 67, + "end": 128 + }, + "messageId": "some-message-id", + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] +}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 75fe36e44..91563b8ae 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,17 +8,16 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - }, - "classifications": [] + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -29,17 +28,20 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - }, - "classifications": [] + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -48,39 +50,762 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 }, { - "x": 15.0, - "y": 20.0 + "x": 1119, + "y": 934 }, { - "x": 20.0, - "y": 25.0 + "x": 1118, + "y": 935 }, { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json new file mode 100644 index 000000000..591e40cf6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json @@ -0,0 +1,823 @@ +[ + { + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + "schemaId": "ckrazcueb16og0z6609jj7y3y", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + }, + { + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + "schemaId": "ckrazcuec16ok0z66f956apb7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.834, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } + }, + { + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + "schemaId": "ckrazcuec16oi0z66dzrd8pfl", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.986, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "polygon": [ + { + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 + } + ] + }, + { + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + "schemaId": "ckrazcuec16om0z66bhhh4tp7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "point": { + "x": 2122, + "y": 1457 + } + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 466a03594..82be4cdab 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,86 +1,826 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "ckrazcueb16og0z6609jj7y3y", + "name": "box a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + }, + "confidence": 0.854, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.7 } - ], - "bbox": { - "top": 1352.0, - "left": 2275.0, - "height": 350.0, - "width": 139.0 - } + ] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "ckrazcuec16ok0z66f956apb7", + "name": "mask a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "classifications": [], - "confidence": 0.834, + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + }, + "confidence": 0.685, "customMetrics": [ { "name": "customMetric1", - "value": 0.3 + "value": 0.4 + }, + { + "name": "customMetric2", + "value": 0.9 } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" - } + ] }, { - "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "ckrazcuec16oi0z66dzrd8pfl", + "name": "polygon a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.986, + "confidence": 0.71, "customMetrics": [ { "name": "customMetric1", - "value": 0.9 + "value": 0.1 } ], "polygon": [ { - "x": 10.0, - "y": 20.0 + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 }, { - "x": 15.0, - "y": 20.0 + "x": 1099, + "y": 911 }, { - "x": 20.0, - "y": 25.0 + "x": 1100, + "y": 911 }, { - "x": 10.0, - "y": 20.0 + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 } ] }, { - "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "ckrazcuec16om0z66bhhh4tp7", + "name": "point a", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, + "confidence": 0.77, + "customMetrics": [ + { + "name": "customMetric2", + "value": 1.2 + } + ], "point": { - "x": 2122.0, - "y": 1457.0 + "x": 2122, + "y": 1457 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json new file mode 100644 index 000000000..31be5a4c7 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json @@ -0,0 +1,10 @@ +[ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "aggregation": "ARITHMETIC_MEAN", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "metricValue": 0.1 + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json new file mode 100644 index 000000000..f4b4894f6 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json @@ -0,0 +1,155 @@ +[{ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 4, + "unit": "POINTS", + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 162.73, + "left": 32.45, + "height": 388.16999999999996, + "width": 101.66000000000001 + } +}, { + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "bbox": { + "top": 223.26, + "left": 251.42, + "height": 457.03999999999996, + "width": 186.78 + } +}, { + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 6, + "unit": "POINTS", + "confidence": 0.99, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 32.52, + "left": 218.17, + "height": 231.73, + "width": 110.56000000000003 + } +}, { + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "confidence": 0.89, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ], + "bbox": { + "top": 117.39, + "left": 4.25, + "height": 456.9200000000001, + "width": 164.83 + } +}, { + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 8, + "unit": "POINTS", + "bbox": { + "top": 82.13, + "left": 217.28, + "height": 279.76, + "width": 82.43000000000004 + } +}, { + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 3, + "unit": "POINTS", + "bbox": { + "top": 298.12, + "left": 83.34, + "height": 203.83000000000004, + "width": 0.37999999999999545 + } +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "named_entity", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] +} +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json new file mode 100644 index 000000000..d6a9eecbd --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json @@ -0,0 +1,36 @@ +[ + { + "line": [ + { + "x": 2534.353, + "y": 249.471 + }, + { + "x": 2429.492, + "y": 182.092 + }, + { + "x": 2294.322, + "y": 221.962 + } + ], + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-line", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.58, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json new file mode 100644 index 000000000..1f26d8dc8 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json @@ -0,0 +1,26 @@ +[ + { + "location": { + "start": 67, + "end": 128 + }, + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53, + "customMetrics": [ + { + "name": "customMetric1", + "value": 0.5 + }, + { + "name": "customMetric2", + "value": 0.3 + } + ] + } +] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json new file mode 100644 index 000000000..11e0753d9 --- /dev/null +++ b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json @@ -0,0 +1,166 @@ +[{ + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" + }, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "schemaId": "ckrb1sfl8099e0y919v260awv" + }], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" +}, { + "classifications": [], + "schemaId": + "cl5islwg200gfci6g0oitaypu", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 + }, + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5iw0roz00lwci6g5jni62vs", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 + }, + "classifications": [] + }] + }] +}] diff --git a/libs/labelbox/tests/data/export/conftest.py b/libs/labelbox/tests/data/export/conftest.py index b1b81230e..4a59b6966 100644 --- a/libs/labelbox/tests/data/export/conftest.py +++ b/libs/labelbox/tests/data/export/conftest.py @@ -1,8 +1,9 @@ import time -from labelbox import MediaType, Client import uuid + import pytest +from labelbox import Client, MediaType from labelbox.schema.annotation_import import AnnotationImportState, LabelImport from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.media_type import MediaType diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index fb78916f4..c9e9bcdb9 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -36,6 +36,13 @@ def test_serialization_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_classification(): label = Label( @@ -125,6 +132,12 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested(): label = Label( @@ -217,6 +230,13 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) + def test_serialization_with_classification_double_nested_2(): label = Label( @@ -306,3 +326,9 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + assert label.model_dump(exclude_none=True) == label.model_dump( + exclude_none=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 82adce99c..8dcb17f0b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,73 +1,15 @@ import json -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - Radio, - Text, -) -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - ClassificationAnswer, -) -from labelbox.data.mixins import CustomMetric - def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.8, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.82, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ), - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -76,48 +18,6 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - label = Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - name="classification a", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="choice 1", - ), - ), - ), - ClassificationAnnotation( - name="classification b", - extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.945, - name="choice 2", - ) - ], - ), - ), - ClassificationAnnotation( - name="classification c", - extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, - value=Text(answer="a value"), - ), - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 5aa7285e2..3269d9c96 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,12 +1,8 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -103,62 +99,25 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - -def test_conversation_entity_import(): - with open( - "tests/data/assets/ndjson/conversation_entity_import.json", "r" - ) as file: - data = json.load(file) - - label = lb_types.Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) - assert res == data + deserialized_label = list(NDJsonConverter().deserialize(ndjson)) + deserialized_label[0].annotations[0].extra.pop("uuid") + assert deserialized_label[0].model_dump(exclude_none=True) == label[ + 0 + ].model_dump(exclude_none=True) -def test_conversation_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/conversation_entity_import.json", "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_conversation_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - label = lb_types.Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - lb_types.ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, - value=lb_types.ConversationEntity( - start=67, end=128, extra={}, message_id="some-message-id" - ), - ) - ], - ) - - res = list(NDJsonConverter.serialize([label])) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 999e1bda5..333c00250 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,29 +1,67 @@ +from copy import copy +import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter +from labelbox.data.serialization.ndjson.objects import ( + NDDicomSegments, + NDDicomSegment, + NDDicomLine, +) + +""" +Data gen prompt test data +""" + +prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), +) + +prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, +} + +data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], +) + +""" +Prompt annotation test +""" def test_serialize_label(): - prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - extra={"uuid": "test"}, - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), - ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + # Remove uuid field since this is a random value that can not be specified also meant for relationships + del serialized_label["uuid"] + assert serialized_label == prompt_text_ndjson + - prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "test", - } - - data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], +def test_deserialize_label(): + deserialized_label = next( + NDJsonConverter().deserialize([prompt_text_ndjson]) ) - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) + if hasattr(deserialized_label.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized_label.annotations[0].extra = {} + assert deserialized_label.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) - assert serialized_label == prompt_text_ndjson + +def test_serialize_deserialize_label(): + serialized = list(NDJsonConverter.serialize([data_gen_label])) + deserialized = next(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized.annotations[0], "extra"): + # Extra fields are added to deserialized label by default need removed to match + deserialized.annotations[0].extra = {} + assert deserialized.model_dump( + exclude_none=True + ) == data_gen_label.model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 6a00fa871..4ea0586c7 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,5 +1,6 @@ from copy import copy import pytest +import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -182,3 +183,28 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson + + +@pytest.mark.parametrize("label, ndjson", labels_ndjsons) +def test_deserialize_label(label, ndjson): + deserialized_label = next(NDJsonConverter().deserialize([ndjson])) + if hasattr(deserialized_label.annotations[0], "extra"): + deserialized_label.annotations[0].extra = {} + for i, annotation in enumerate(deserialized_label.annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value + + +@pytest.mark.parametrize("label", labels) +def test_serialize_deserialize_label(label): + serialized = list(NDJsonConverter.serialize([label])) + deserialized = list(NDJsonConverter.deserialize(serialized)) + if hasattr(deserialized[0].annotations[0], "extra"): + deserialized[0].annotations[0].extra = {} + for i, annotation in enumerate(deserialized[0].annotations): + if hasattr(annotation, "frames"): + assert annotation.frames == label.annotations[i].frames + if hasattr(annotation, "value"): + assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index fcdf4368b..b00182275 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,19 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - RectangleUnit, - Point, - DocumentRectangle, - DocumentEntity, - DocumentTextSelection, -) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -66,144 +53,10 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - labels = [ - Label( - uid=None, - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.89, - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_with_name_only(): @@ -212,135 +65,26 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.99, - name="boxy", - feature_schema_id=None, - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=DocumentRectangle( - start=Point(x=32.45, y=162.73), - end=Point(x=134.11, y=550.9), - page=4, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - }, - value=DocumentRectangle( - start=Point(x=251.42, y=223.26), - end=Point(x=438.2, y=680.3), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - }, - value=DocumentRectangle( - start=Point(x=218.17, y=32.52), - end=Point(x=328.73, y=264.25), - page=6, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.74, - name="boxy", - extra={ - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - }, - value=DocumentRectangle( - start=Point(x=4.25, y=117.39), - end=Point(x=169.08, y=574.3100000000001), - page=7, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - }, - value=DocumentRectangle( - start=Point(x=217.28, y=82.13), - end=Point(x=299.71000000000004, y=361.89), - page=8, - unit=RectangleUnit.POINTS, - ), - ), - ObjectAnnotation( - name="boxy", - extra={ - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - }, - value=DocumentRectangle( - start=Point(x=83.34, y=298.12), - end=Point(x=83.72, y=501.95000000000005), - page=3, - unit=RectangleUnit.POINTS, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="named_entity", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=DocumentEntity( - text_selections=[ - DocumentTextSelection( - token_ids=[ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c", - ], - group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - page=1, - ) - ] - ), - ) - ], - ), - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson + + +def test_pdf_bbox_deserialize(): + deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) + deserialized[0].annotations[0].extra = {} + assert ( + deserialized[0].annotations[0].value + == bbox_labels[0].annotations[0].value + ) + assert ( + deserialized[0].annotations[0].name + == bbox_labels[0].annotations[0].name + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index a0cd13e81..d6efab3ee 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,6 +9,8 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=GenericDataRowData( uid="cklr9mr4m5iao0rb6cvxu4qbn", + file_path=None, + frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -20,7 +22,6 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", - "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -587,4 +588,31 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - assert label == video_serialized_bbox_label() + manual_label = video_serialized_bbox_label() + + for key in label.keys(): + # ignore uuid because we randomize if there was none + if key != "uuid": + assert label[key] == manual_label[key] + + assert len(label["segments"]) == 2 + assert len(label["segments"][0]["keyframes"]) == 2 + assert len(label["segments"][1]["keyframes"]) == 4 + + # #converts back only the keyframes. should be the sum of all prev segments + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + assert len(label.annotations) == 6 + + +def test_confidence_is_ignored(): + label = video_bbox_label() + serialized_labels = NDJsonConverter.serialize([label]) + label = next(serialized_labels) + label["confidence"] = 0.453 + label["segments"][0]["confidence"] = 0.453 + + deserialized_labels = NDJsonConverter.deserialize([label]) + label = next(deserialized_labels) + for annotation in label.annotations: + assert annotation.confidence is None diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 7b03a8447..7daf17188 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,6 +34,16 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "text_answer" + assert annotation_value.confidence == 0.5 + def test_nested_serialization(): label = Label( @@ -92,3 +102,19 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + answer = annotation.value.answer[0] + assert answer.confidence == 0.9 + assert answer.name == "first_answer" + + classification_answer = answer.classifications[0].value.answer + assert classification_answer.confidence == 0.8 + assert classification_answer.name == "first_sub_radio_answer" + + sub_classification_answer = classification_answer.classifications[0].value + assert type(sub_classification_answer) is Text + assert sub_classification_answer.answer == "nested answer" + assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index d104a691e..2b3fa7f8c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,74 +1,73 @@ -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import json +import pytest + +from labelbox.data.serialization.ndjson.classification import NDRadio + from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ClassificationAnnotation, - Radio, - ClassificationAnswer, -) +from labelbox.data.serialization.ndjson.objects import NDLine -def test_generic_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def round_dict(data): + if isinstance(data, dict): + for key in data: + if isinstance(data[key], float): + data[key] = int(data[key]) + elif isinstance(data[key], dict): + data[key] = round_dict(data[key]) + elif isinstance(data[key], (list, tuple)): + data[key] = [round_dict(r) for r in data[key]] - label = Label( - data=GenericDataRowData( - global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) + return data + + +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/classification_import_global_key.json", + "tests/data/assets/ndjson/metric_import_global_key.json", + "tests/data/assets/ndjson/polyline_import_global_key.json", + "tests/data/assets/ndjson/text_entity_import_global_key.json", + "tests/data/assets/ndjson/conversation_entity_import_global_key.json", + ], +) +def test_many_types(filename: str): + with open(filename, "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == data + f.close() - res = list(NDJsonConverter.serialize([label])) - assert res == expected +def test_image(): + with open( + "tests/data/assets/ndjson/image_import_global_key.json", "r" + ) as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() -def test_dict_data_row_global_key_included(): - expected = [ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - } - ] +def test_pdf(): + with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() - label = Label( - data={ - "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", - }, - annotations=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ) - res = list(NDJsonConverter.serialize([label])) +def test_video(): + with open( + "tests/data/assets/ndjson/video_import_global_key.json", "r" + ) as f: + data = json.load(f) - assert res == expected + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] + f.close() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 4d615658c..94198999f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,8 +1,4 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -13,7 +9,6 @@ ObjectAnnotation, MaskData, ) -from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -33,74 +28,12 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_image_with_name_only(): @@ -109,74 +42,11 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.851, - name="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Rectangle( - start=Point(extra={}, x=2275.0, y=1352.0), - end=Point(extra={}, x=2414.0, y=1702.0), - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.3) - ], - confidence=0.834, - name="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=[255, 0, 0], - ), - ), - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.9) - ], - confidence=0.986, - name="ckrazcuec16oi0z66dzrd8pfl", - extra={ - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - }, - value=Polygon( - points=[ - Point(x=10.0, y=20.0), - Point(x=15.0, y=20.0), - Point(x=20.0, y=25.0), - Point(x=10.0, y=20.0), - ], - ), - ), - ObjectAnnotation( - name="ckrazcuec16om0z66bhhh4tp7", - extra={ - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - }, - value=Point(x=2122.0, y=1457.0), - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - del res[1]["mask"]["colorRGB"] # JSON does not support tuples - assert res == data + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask(): @@ -186,11 +56,10 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], - "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -198,54 +67,16 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": (255, 0, 0), + "colorRGB": [255, 0, 0], }, - "classifications": [], }, ] + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop("classifications", None) - mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) - mask_numpy = mask_numpy.astype(np.uint8) - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrazctum0z8a0ybc0b0o0g0v", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.4) - ], - confidence=0.8, - feature_schema_id="ckrazcueb16og0z6609jj7y3y", - extra={ - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - }, - value=Mask( - mask=MaskData(arr=mask_numpy), - color=(1, 1, 1), - ), - ), - ObjectAnnotation( - feature_schema_id="ckrazcuec16ok0z66f956apb7", - extra={ - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - }, - value=Mask( - extra={}, - mask=MaskData( - url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - ), - color=(255, 0, 0), - ), - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] def test_mask_from_arr(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 40e098405..45c5c67bf 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,166 +1,38 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.annotation_types.metrics.confusion_matrix import ( - ConfusionMatrixMetric, -) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ScalarMetric, - ScalarMetricAggregation, - ConfusionMatrixAggregation, -) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert reserialized == data def test_custom_scalar_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": 0.1, - "metricName": "custom_iou", - "featureName": "sample_class", - "aggregation": "SUM", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: 0.1, 0.2: 0.5}, - "metricName": "custom_iou", - "aggregation": "SUM", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ScalarMetric( - value=0.1, - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value=0.1, - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ScalarMetric( - value={"0.1": 0.1, "0.2": 0.5}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="custom_iou", - aggregation=ScalarMetricAggregation.SUM, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_scalar_import.json", "r" + ) as file: + data = json.load(file) - assert res == data + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) def test_custom_confusion_matrix_metric(): - data = [ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (1, 1, 2, 3), - "metricName": "50%_iou", - "featureName": "sample_class", - "subclassName": "sample_subclass", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": (0, 1, 2, 5), - "metricName": "50%_iou", - "featureName": "sample_class", - "aggregation": "CONFUSION_MATRIX", - }, - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", - "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, - "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - "metricName": "50%_iou", - "aggregation": "CONFUSION_MATRIX", - }, - ] - - labels = [ - Label( - data=GenericDataRowData( - uid="ckrmdnqj4000007msh9p2a27r", - ), - annotations=[ - ConfusionMatrixMetric( - value=(1, 1, 2, 3), - feature_name="sample_class", - subclass_name="sample_subclass", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value=(0, 1, 2, 5), - feature_name="sample_class", - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ConfusionMatrixMetric( - value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, - extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, - metric_name="50%_iou", - aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, - ), - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + with open( + "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" + ) as file: + data = json.load(file) - assert data == res + label_list = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(label_list)) + assert json.dumps(reserialized, sort_keys=True) == json.dumps( + data, sort_keys=True + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 202f793fe..69594ff73 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,125 +1,32 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest from labelbox.data.serialization import NDJsonConverter -from labelbox.types import ( - Label, - MessageEvaluationTaskAnnotation, - MessageSingleSelectionTask, - MessageMultiSelectionTask, - MessageInfo, - OrderedMessageInfo, - MessageRankingTask, -) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cnjencjencjfencvj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="single-selection", - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, - value=MessageSingleSelectionTask( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - parent_message_id="clxfznjb800073b6v43ppx9ca", - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cfcerfvergerfefj", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="multi-selection", - extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, - value=MessageMultiSelectionTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - selected_messages=[ - MessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 5", - ) - ], - ), - ) - ], - ), - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=2, - ), - ], - ), - ) - ], - ), - ] + deserialized = list(NDJsonConverter.deserialize(data)) + reserialized = list(NDJsonConverter.serialize(deserialized)) - res = list(NDJsonConverter.serialize(labels)) - - assert res == data + assert data == reserialized def test_mesage_ranking_task_wrong_order_serialization(): + with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: + data = json.load(file) + + some_ranking_task = next( + task + for task in data + if task["messageEvaluationTask"]["format"] == "message-ranking" + ) + some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ + "order" + ] = 3 + with pytest.raises(ValueError): - ( - Label( - data=GenericDataRowData( - uid="cwefgtrgrthveferfferffr", - ), - annotations=[ - MessageEvaluationTaskAnnotation( - name="ranking", - extra={ - "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" - }, - value=MessageRankingTask( - parent_message_id="clxfznjb800073b6v43ppx9ca", - ranked_messages=[ - OrderedMessageInfo( - message_id="clxfzocbm00093b6vx4ndisub", - model_config_name="GPT 5", - order=1, - ), - OrderedMessageInfo( - message_id="clxfzocbm00083b6v8vczsept", - model_config_name="GPT 4 with temperature 0.7", - order=1, - ), - ], - ), - ) - ], - ), - ) + list(NDJsonConverter.deserialize([some_ranking_task])) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py new file mode 100644 index 000000000..790bd87b3 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py @@ -0,0 +1,19 @@ +import json +from labelbox.data.serialization.ndjson.label import NDLabel +from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle +import pytest + + +def test_bad_annotation_input(): + data = [{"test": 3}] + with pytest.raises(ValueError): + NDLabel(**{"annotations": data}) + + +def test_correct_annotation_input(): + with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: + data = json.load(f) + assert isinstance( + NDLabel(**{"annotations": [data[0]]}).annotations[0], + NDDocumentRectangle, + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index 3633c9cbe..e0f0df0e6 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,135 +1,13 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Rectangle, - Point, - ClassificationAnnotation, - Radio, - ClassificationAnswer, - Text, - Checklist, -) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.34, - feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Radio( - answer=ClassificationAnswer( - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ), - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "5d03213e-4408-456c-9eca-cf0723202961", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - value=Checklist( - answer=[ - ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.894, - feature_schema_id="ckrb1sfl8099e0y919v260awv", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - feature_schema_id="ckrb1sfjx099a0y914hl319ie", - extra={ - "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - extra={}, - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -138,112 +16,6 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="box a", - extra={ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - value=Rectangle( - start=Point(x=2275.0, y=1352.0), - end=Point(x=2414.0, y=1702.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification a", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.811, - name="first answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box b", - extra={ - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification b", - value=Radio( - answer=ClassificationAnswer( - custom_metrics=[ - CustomMetric( - name="customMetric1", value=0.5 - ), - CustomMetric( - name="customMetric2", value=0.3 - ), - ], - confidence=0.815, - name="second answer", - ), - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="classification c", - value=Checklist( - answer=[ - ClassificationAnswer( - name="third answer", - ) - ], - ), - ) - ], - ), - ObjectAnnotation( - name="box c", - extra={ - "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", - }, - value=Rectangle( - start=Point(x=2089.0, y=1251.0), - end=Point(x=2247.0, y=1679.0), - ), - classifications=[ - ClassificationAnnotation( - name="a string", - value=Text( - answer="a string", - ), - ) - ], - ), - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index cd11d97fe..97d48a14e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,76 +1,18 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ObjectAnnotation, Point, Line, Label - - -def test_polyline_import_with_confidence(): - with open( - "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" - ) as file: - data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_polyline_import_without_confidence(): - with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/polyline_without_confidence_import.json", + "tests/data/assets/ndjson/polyline_import.json", + ], +) +def test_polyline_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.58, - name="some-line", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=Line( - points=[ - Point(x=2534.353, y=249.471), - Point(x=2429.492, y=182.092), - Point(x=2294.322, y=221.962), - ], - ), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index ec57f0528..2b2ade5d6 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,3 +1,4 @@ +import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -38,6 +39,14 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + + for i, annotation in enumerate(res.annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_serialization_with_radio_classification(): label = Label( @@ -90,3 +99,10 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations[0].model_dump( + exclude_none=True + ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 0e42ab152..66630dbb5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,10 +1,6 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -12,26 +8,8 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="ckrb1sf1i1g7i0ybcdc6oc8ct", - ), - annotations=[ - ObjectAnnotation( - name="bbox", - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - }, - value=Rectangle( - start=Point(x=38.0, y=28.0), - end=Point(x=81.0, y=69.0), - ), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data @@ -61,6 +39,8 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, }, ) @@ -68,9 +48,8 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - data = list(NDJsonConverter.serialize([label])) - - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_rectangle_mixed_start_end_points(): @@ -97,13 +76,17 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + "page": None, + "unit": None, + }, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - data = list(NDJsonConverter.serialize([label])) - assert res == data + res = list(NDJsonConverter.deserialize(res)) + assert res == [label] def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index 235b66957..f33719035 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,135 +1,16 @@ import json +from uuid import uuid4 -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import ( - Label, - ObjectAnnotation, - Point, - Rectangle, - RelationshipAnnotation, - Relationship, -) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = [ - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - extra={}, - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - Label( - data=GenericDataRowData( - uid="clf98gj90000qp38ka34yhptl-DIFFERENT", - ), - annotations=[ - ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - RelationshipAnnotation( - name="is chasing", - extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, - value=Relationship( - source=ObjectAnnotation( - name="dog", - extra={ - "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", - }, - value=Rectangle( - start=Point(x=400.0, y=500.0), - end=Point(x=600.0, y=700.0), - ), - ), - target=ObjectAnnotation( - name="cat", - extra={ - "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", - }, - value=Rectangle( - start=Point(x=100.0, y=200.0), - end=Point(x=200.0, y=300.0), - ), - ), - type=Relationship.Type.UNIDIRECTIONAL, - ), - ), - ], - ), - ] + res = list(NDJsonConverter.deserialize(data)) res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -163,3 +44,29 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] + + +def test_relationship_nonexistent_object(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + relationship_annotation = data[2] + source_uuid = relationship_annotation["relationship"]["source"] + target_uuid = str(uuid4()) + relationship_annotation["relationship"]["target"] = target_uuid + error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" + + with pytest.raises(ValueError, match=error_msg): + list(NDJsonConverter.deserialize(data)) + + +def test_relationship_duplicate_uuids(): + with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: + data = json.load(file) + + source, target = data[0], data[1] + target["uuid"] = source["uuid"] + error_msg = f"UUID '{source['uuid']}' is not unique" + + with pytest.raises(AssertionError, match=error_msg): + list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 28eba07bd..83ac0da68 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,5 +1,7 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnswer, + Radio, Text, ) from labelbox.data.annotation_types.data import GenericDataRowData @@ -31,3 +33,11 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index fb93f15d4..3e856f001 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,68 +1,21 @@ import json -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.mixins import CustomMetric +import pytest from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.types import Label, ObjectAnnotation, TextEntity - - -def test_text_entity_import(): - with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: - data = json.load(file) - - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - custom_metrics=[ - CustomMetric(name="customMetric1", value=0.5), - CustomMetric(name="customMetric2", value=0.3), - ], - confidence=0.53, - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - res = list(NDJsonConverter.serialize(labels)) - assert res == data -def test_text_entity_import_without_confidence(): - with open( +@pytest.mark.parametrize( + "filename", + [ + "tests/data/assets/ndjson/text_entity_import.json", "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - "r", - ) as file: + ], +) +def test_text_entity_import(filename: str): + with open(filename, "r") as file: data = json.load(file) - labels = [ - Label( - data=GenericDataRowData( - uid="cl6xnv9h61fv0085yhtoq06ht", - ), - annotations=[ - ObjectAnnotation( - name="some-text-entity", - feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", - extra={ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - }, - value=TextEntity(start=67, end=128, extra={}), - ) - ], - ) - ] - - res = list(NDJsonConverter.serialize(labels)) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 6c14343a4..b0e277d9d 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,24 +1,22 @@ import json +from operator import itemgetter + from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, - Text, ) from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.geometry.line import Line from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.geometry.rectangle import Rectangle - from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import ( VideoClassificationAnnotation, VideoObjectAnnotation, ) - from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from operator import itemgetter def test_video(): @@ -293,7 +291,10 @@ def test_video(): data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_name_only(): @@ -568,7 +569,9 @@ def test_video_name_only(): data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - assert data == res + pairs = zip(data, res) + for data, res in pairs: + assert data == res def test_video_classification_global_subclassifications(): @@ -586,6 +589,7 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( + name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -612,7 +616,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = { + expected_second_annotation = nested_checklist_annotation_ndjson = { "name": "nested_checklist_question", "answer": [ { @@ -634,6 +638,12 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + assert annotation.name == label.annotations[i].name + def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -799,6 +809,14 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + assert annotation.name == label.annotations[i].name + def test_video_classification_point(): bbox_annotation = [ @@ -949,6 +967,13 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value + def test_video_classification_frameline(): bbox_annotation = [ @@ -1116,289 +1141,9 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - -[ - { - "answer": "a value", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "frames": [{"end": 5, "start": 0}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5islwg200gfci6g0oitaypu", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - { - "classifications": [], - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - }, - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - } - ] - }, - ], - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "segments": [ - { - "keyframes": [ - { - "classifications": [], - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - } - ] - }, - { - "keyframes": [ - { - "classifications": [], - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - }, - { - "classifications": [], - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - }, - ] - }, - ], - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - }, - { - "classifications": [], - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "segments": [ - { - "keyframes": [ - { - "bbox": { - "height": 100.0, - "left": 5.0, - "top": 10.0, - "width": 150.0, - }, - "classifications": [], - "frame": 1, - }, - { - "bbox": { - "height": 50.0, - "left": 5.0, - "top": 30.0, - "width": 150.0, - }, - "classifications": [], - "frame": 5, - }, - ] - }, - { - "keyframes": [ - { - "bbox": { - "height": 400.0, - "left": 200.0, - "top": 300.0, - "width": 150.0, - }, - "classifications": [], - "frame": 10, - } - ] - }, - ], - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - }, -] - -[ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], - }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}], - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", - }, - { - "classifications": [], - "schemaId": "cl5islwg200gfci6g0oitaypu", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [ - {"x": 10.0, "y": 10.0}, - {"x": 100.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - { - "frame": 5, - "line": [ - {"x": 15.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [ - {"x": 100.0, "y": 10.0}, - {"x": 50.0, "y": 100.0}, - {"x": 50.0, "y": 30.0}, - ], - "classifications": [], - } - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0}, - "classifications": [], - } - ] - }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0}, - "classifications": [], - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0}, - "classifications": [], - }, - ] - }, - ], - }, - { - "classifications": [], - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0, - }, - "classifications": [], - }, - { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0, - }, - "classifications": [], - }, - ] - }, - { - "keyframes": [ - { - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0, - }, - "classifications": [], - } - ] - }, - ], - }, -] + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for i, annotation in enumerate(annotations): + annotation.extra.pop("uuid") + assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 88670811e..eb23e4ad4 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -11,7 +11,6 @@ from typing import List, Tuple, Type import pytest -import requests from labelbox import ( Classification, @@ -113,7 +112,7 @@ def configured_project( @pytest.fixture def configured_project_with_complex_ontology( - client: Client, initial_dataset, rand_gen, image_url, teardown_helpers + client, initial_dataset, rand_gen, image_url, teardown_helpers ): project = client.create_project( name=rand_gen(str), @@ -632,11 +631,28 @@ def chat_evaluation_ontology(client, rand_gen): @pytest.fixture -def live_chat_evaluation_project_with_new_dataset(client, rand_gen): +def live_chat_evaluation_project(client, rand_gen): project_name = f"test-model-evaluation-project-{rand_gen(str)}" - dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}" - project = client.create_model_evaluation_project( - name=project_name, dataset_name=dataset_name, data_row_count=1 + project = client.create_model_evaluation_project(name=project_name) + + yield project + + project.delete() + + +@pytest.fixture +def live_chat_evaluation_project_with_batch( + client, + rand_gen, + live_chat_evaluation_project, + offline_conversational_data_row, +): + project_name = f"test-model-evaluation-project-{rand_gen(str)}" + project = client.create_model_evaluation_project(name=project_name) + + project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects ) yield project @@ -832,3 +848,21 @@ def print_perf_summary(): for aaa in islice(sorted_dict, num_of_entries) ] print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) + + +@pytest.fixture +def make_metadata_fields(constants): + msg = "A message" + time = datetime.now(timezone.utc) + + fields = [ + DataRowMetadataField( + schema_id=constants["SPLIT_SCHEMA_ID"], + value=constants["TEST_SPLIT_ID"], + ), + DataRowMetadataField( + schema_id=constants["CAPTURE_DT_SCHEMA_ID"], value=time + ), + DataRowMetadataField(schema_id=constants["TEXT_SCHEMA_ID"], value=msg), + ] + return fields diff --git a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py index 3e462d677..2c02b77ac 100644 --- a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py +++ b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py @@ -1,5 +1,3 @@ -from unittest.mock import patch - import pytest from labelbox import MediaType @@ -7,9 +5,8 @@ def test_create_chat_evaluation_ontology_project( - client, chat_evaluation_ontology, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, offline_conversational_data_row, rand_gen, ): @@ -28,7 +25,7 @@ def test_create_chat_evaluation_ontology_project( assert classification.schema_id assert classification.feature_schema_id - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project assert project.model_setup_complete is None project.connect_ontology(ontology) @@ -36,28 +33,11 @@ def test_create_chat_evaluation_ontology_project( assert project.labeling_frontend().name == "Editor" assert project.ontology().name == ontology.name - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - project.create_batch( - rand_gen(str), - [offline_conversational_data_row.uid], # sample of data row objects - ) - - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - with patch( - "labelbox.schema.project.MAX_SYNC_BATCH_ROW_COUNT", new=0 - ): # force to async - project.create_batch( - rand_gen(str), - [ - offline_conversational_data_row.uid - ], # sample of data row objects - ) + batch = project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects + ) + assert batch def test_create_chat_evaluation_ontology_project_existing_dataset( diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 481385e75..485719575 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -21,24 +21,17 @@ from labelbox.schema.media_type import MediaType from labelbox.schema.task import Task -SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" -TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" -TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" -CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" -EXPECTED_METADATA_SCHEMA_IDS = [ - SPLIT_SCHEMA_ID, - TEST_SPLIT_ID, - TEXT_SCHEMA_ID, - CAPTURE_DT_SCHEMA_ID, -].sort() -CUSTOM_TEXT_SCHEMA_NAME = "custom_text" - @pytest.fixture -def mdo(client): +def mdo( + client, + constants, +): mdo = client.get_data_row_metadata_ontology() try: - mdo.create_schema(CUSTOM_TEXT_SCHEMA_NAME, DataRowMetadataKind.string) + mdo.create_schema( + constants["CUSTOM_TEXT_SCHEMA_NAME"], DataRowMetadataKind.string + ) except MalformedQueryException: # Do nothing if already exists pass @@ -93,26 +86,18 @@ def tile_content(): } -def make_metadata_fields(): - msg = "A message" - time = datetime.now(timezone.utc) - - fields = [ - DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID), - DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time), - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg), - ] - return fields - - -def make_metadata_fields_dict(): +@pytest.fixture +def make_metadata_fields_dict(constants): msg = "A message" time = datetime.now(timezone.utc) fields = [ - {"schema_id": SPLIT_SCHEMA_ID, "value": TEST_SPLIT_ID}, - {"schema_id": CAPTURE_DT_SCHEMA_ID, "value": time}, - {"schema_id": TEXT_SCHEMA_ID, "value": msg}, + { + "schema_id": constants["SPLIT_SCHEMA_ID"], + "value": constants["TEST_SPLIT_ID"], + }, + {"schema_id": constants["CAPTURE_DT_SCHEMA_ID"], "value": time}, + {"schema_id": constants["TEXT_SCHEMA_ID"], "value": msg}, ] return fields @@ -375,15 +360,22 @@ def test_create_data_row_with_invalid_input(dataset, image_url): dataset.create_data_row("asdf") -def test_create_data_row_with_metadata(mdo, dataset, image_url): +def test_create_data_row_with_metadata( + mdo, + dataset, + image_url, + make_metadata_fields, + constants, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields() + row_data=image_url, metadata_fields=make_metadata_fields ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() @@ -396,22 +388,24 @@ def test_create_data_row_with_metadata(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): +def test_create_data_row_with_metadata_dict( + mdo, dataset, image_url, constants, make_metadata_fields_dict +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields_dict() + row_data=image_url, metadata_fields=make_metadata_fields_dict ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() @@ -424,25 +418,36 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_row_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) with pytest.raises(ResourceCreationError): dataset.create_data_row(row_data=image_url, metadata_fields=fields) -def test_create_data_rows_with_metadata(mdo, dataset, image_url): +def test_create_data_rows_with_metadata( + mdo, + dataset, + image_url, + constants, + make_metadata_fields, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 @@ -451,22 +456,22 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): { DataRow.row_data: image_url, DataRow.external_id: "row1", - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row2", - "metadata_fields": make_metadata_fields(), + "metadata_fields": make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row3", - DataRow.metadata_fields: make_metadata_fields_dict(), + DataRow.metadata_fields: make_metadata_fields_dict, }, { DataRow.row_data: image_url, DataRow.external_id: "row4", - "metadata_fields": make_metadata_fields_dict(), + "metadata_fields": make_metadata_fields_dict, }, ] ) @@ -488,9 +493,9 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): metadata = row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) @@ -505,14 +510,16 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): ], ) def test_create_data_rows_with_named_metadata_field_class( - test_function, metadata_obj_type, mdo, dataset, image_url + test_function, metadata_obj_type, mdo, dataset, image_url, constants ): row_with_metadata_field = { DataRow.row_data: image_url, DataRow.external_id: "row1", DataRow.metadata_fields: [ DataRowMetadataField(name="split", value="test"), - DataRowMetadataField(name=CUSTOM_TEXT_SCHEMA_NAME, value="hello"), + DataRowMetadataField( + name=constants["CUSTOM_TEXT_SCHEMA_NAME"], value="hello" + ), ], } @@ -521,7 +528,7 @@ def test_create_data_rows_with_named_metadata_field_class( DataRow.external_id: "row2", "metadata_fields": [ {"name": "split", "value": "test"}, - {"name": CUSTOM_TEXT_SCHEMA_NAME, "value": "hello"}, + {"name": constants["CUSTOM_TEXT_SCHEMA_NAME"], "value": "hello"}, ], } @@ -552,21 +559,26 @@ def create_data_row(data_rows): assert len(created_rows[0].metadata) == 2 metadata = created_rows[0].metadata - assert metadata[0].schema_id == SPLIT_SCHEMA_ID + assert metadata[0].schema_id == constants["SPLIT_SCHEMA_ID"] assert metadata[0].name == "test" assert metadata[0].value == mdo.reserved_by_name["split"]["test"].uid - assert metadata[1].name == CUSTOM_TEXT_SCHEMA_NAME + assert metadata[1].name == constants["CUSTOM_TEXT_SCHEMA_NAME"] assert metadata[1].value == "hello" assert ( - metadata[1].schema_id == mdo.custom_by_name[CUSTOM_TEXT_SCHEMA_NAME].uid + metadata[1].schema_id + == mdo.custom_by_name[constants["CUSTOM_TEXT_SCHEMA_NAME"]].uid ) -def test_create_data_rows_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) task = dataset.create_data_rows( @@ -577,13 +589,15 @@ def test_create_data_rows_with_invalid_metadata(dataset, image_url): assert task.status == "COMPLETE" assert len(task.failed_data_rows) == 1 assert ( - f"A schemaId can only be specified once per DataRow : [{TEXT_SCHEMA_ID}]" + f"A schemaId can only be specified once per DataRow : [{constants['TEXT_SCHEMA_ID']}]" in task.failed_data_rows[0]["message"] ) -def test_create_data_rows_with_metadata_missing_value(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_value( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"schemaId": "some schema id"}) with pytest.raises(ValueError) as exc: @@ -598,8 +612,10 @@ def test_create_data_rows_with_metadata_missing_value(dataset, image_url): ) -def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_schema_id( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"value": "some value"}) with pytest.raises(ValueError) as exc: @@ -614,8 +630,10 @@ def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): ) -def test_create_data_rows_with_metadata_wrong_type(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_wrong_type( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append("Neither DataRowMetadataField or dict") with pytest.raises(ValueError) as exc: @@ -899,7 +917,11 @@ def test_does_not_update_not_provided_attachment_fields(data_row): assert attachment.attachment_type == "RAW_TEXT" -def test_create_data_rows_result(client, dataset, image_url): +def test_create_data_rows_result( + client, + dataset, + image_url, +): task = dataset.create_data_rows( [ { @@ -918,12 +940,14 @@ def test_create_data_rows_result(client, dataset, image_url): client.get_data_row(result["id"]) -def test_create_data_rows_local_file(dataset, sample_image): +def test_create_data_rows_local_file( + dataset, sample_image, make_metadata_fields +): task = dataset.create_data_rows( [ { DataRow.row_data: sample_image, - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, } ] ) diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index 9d5e178b6..fc604ac8a 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -43,19 +43,18 @@ def test_request_labeling_service_moe_offline_project( def test_request_labeling_service_moe_project( - rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project_with_batch, chat_evaluation_ontology, model_config, ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project_with_batch project.connect_ontology(chat_evaluation_ontology) project.upsert_instructions("tests/integration/media/sample_pdf.pdf") labeling_service = project.get_labeling_service() with pytest.raises( - LabelboxError, + MalformedQueryException, match='[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]', ): labeling_service.request() @@ -77,5 +76,5 @@ def test_request_labeling_service_incomplete_requirements(ontology, project): ): # No labeling service by default labeling_service.request() project.connect_ontology(ontology) - with pytest.raises(LabelboxError): + with pytest.raises(MalformedQueryException): labeling_service.request() diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py new file mode 100644 index 000000000..2fa7bdd1b --- /dev/null +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -0,0 +1,82 @@ +import json +import random + +import pytest + +from labelbox.schema.data_row_payload_templates import ModelEvaluationTemplate + + +@pytest.fixture +def mmc_data_row(dataset): + data = ModelEvaluationTemplate() + + content_all = data.model_dump(exclude_none=True) + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + + +@pytest.fixture +def mmc_data_row_all(dataset, make_metadata_fields, embedding): + data = ModelEvaluationTemplate() + data.row_data.rootMessageIds = ["root1"] + data.row_data.global_key = "global_key" + vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + data.embeddings = [{"embedding_id": embedding.id, "vector": vector}] + data.metadata_fields = make_metadata_fields + data.attachments = [{"type": "RAW_TEXT", "value": "attachment value"}] + + content_all = data.model_dump(exclude_none=True) + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + + +def test_mmc(mmc_data_row): + data_row = mmc_data_row + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": [], + "actors": {}, + "messages": {}, + "version": 2, + } + + +def test_mmc_all(mmc_data_row_all, embedding, constants): + data_row = mmc_data_row_all + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": ["root1"], + "actors": {}, + "messages": {}, + "version": 2, + "globalKey": "global_key", + } + + metadata_fields = data_row.metadata_fields + metadata = data_row.metadata + assert len(metadata_fields) == 3 + assert len(metadata) == 3 + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() + + attachments = list(data_row.attachments()) + assert len(attachments) == 1 + + assert embedding.get_imported_vector_count() == 1 diff --git a/libs/labelbox/tests/integration/test_project_model_config.py b/libs/labelbox/tests/integration/test_project_model_config.py index f86bbb38e..f1646dfc0 100644 --- a/libs/labelbox/tests/integration/test_project_model_config.py +++ b/libs/labelbox/tests/integration/test_project_model_config.py @@ -2,10 +2,8 @@ from lbox.exceptions import ResourceNotFoundError -def test_add_single_model_config( - live_chat_evaluation_project_with_new_dataset, model_config -): - configured_project = live_chat_evaluation_project_with_new_dataset +def test_add_single_model_config(live_chat_evaluation_project, model_config): + configured_project = live_chat_evaluation_project project_model_config_id = configured_project.add_model_config( model_config.uid ) @@ -22,11 +20,11 @@ def test_add_single_model_config( def test_add_multiple_model_config( client, rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, model_config, valid_model_id, ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project second_model_config = client.create_model_config( rand_gen(str), valid_model_id, {"param": "value"} ) @@ -52,9 +50,9 @@ def test_add_multiple_model_config( def test_delete_project_model_config( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project assert configured_project.delete_project_model_config( configured_project.add_model_config(model_config.uid) ) diff --git a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py index 8872a27f4..30e179028 100644 --- a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py +++ b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py @@ -3,9 +3,9 @@ def test_live_chat_evaluation_project( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project.set_project_model_setup_complete() assert bool(project.model_setup_complete) is True @@ -18,9 +18,9 @@ def test_live_chat_evaluation_project( def test_live_chat_evaluation_project_delete_cofig( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project_model_config_id = project.add_model_config(model_config.uid) assert project_model_config_id