From 4286ec4e51c3a136a96f6bb36c039b04fe400527 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Sat, 14 Sep 2024 20:43:38 -0500 Subject: [PATCH 1/7] finish remaining integration tests --- .../test_bulk_import_request.py | 48 ----------- .../data/annotation_import/test_data_types.py | 83 ------------------- .../test_generic_data_types.py | 70 ---------------- .../test_mea_prediction_import.py | 70 ++++++++++++++-- .../ndjson/test_generic_data_row_data.py | 79 ++++++++++++++++++ 5 files changed, 144 insertions(+), 206 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_import/test_data_types.py create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py index 9abae1422..b2289503e 100644 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py @@ -1,38 +1,11 @@ -from unittest.mock import patch import uuid from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation from labelbox.data.serialization import NDJsonConverter from labelbox.exceptions import MALValidationError, UuidError from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType """ - Here we only want to check that the uploads are calling the validation @@ -87,27 +60,6 @@ def test_create_from_objects( ) -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - def test_create_from_local_file( tmp_path, predictions, module_project, annotation_import_test_helpers ): diff --git a/libs/labelbox/tests/data/annotation_import/test_data_types.py b/libs/labelbox/tests/data/annotation_import/test_data_types.py deleted file mode 100644 index 1e45295ef..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_data_types.py +++ /dev/null @@ -1,83 +0,0 @@ -import pytest - -from labelbox.data.annotation_types.data import ( - AudioData, - ConversationData, - DocumentData, - HTMLData, - ImageData, - TextData, -) -from labelbox.data.serialization import NDJsonConverter -from labelbox.data.annotation_types.data.video import VideoData - -import labelbox.types as lb_types -from labelbox.schema.media_type import MediaType - -# Unit test for label based on data type. -# TODO: Dicom removed it is unstable when you deserialize and serialize on label import. If we intend to keep this library this needs add generic data types tests work with this data type. -# TODO: add MediaType.LLMPromptResponseCreation(data gen) once supported and llm human preference once media type is added - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index f8f0c449a..18385c9d9 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -28,76 +28,6 @@ def validate_iso_format(date_string: str): assert parsed_t.second is not None -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - (MediaType.LLMPromptResponseCreation, GenericDataRowData), - (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - # (MediaType.LLMPromptResponseCreation, GenericDataRowData), - # (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations - - @pytest.mark.parametrize( "configured_project, media_type", [ diff --git a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py index fccca2a3f..5f47975ad 100644 --- a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py +++ b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py @@ -1,5 +1,19 @@ import uuid from labelbox import parser +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.geometry.line import Line +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.polygon import Polygon +from labelbox.data.annotation_types.geometry.rectangle import Rectangle +from labelbox.data.annotation_types.label import Label import pytest from labelbox import ModelRun @@ -193,14 +207,60 @@ def test_create_from_label_objects( annotation_import_test_helpers, ): name = str(uuid.uuid4()) - use_data_row_ids = [ + use_data_row_id = [ p["dataRow"]["id"] for p in object_predictions_for_annotation_import ] - model_run_with_data_rows.upsert_data_rows(use_data_row_ids) - predictions = list( - NDJsonConverter.deserialize(object_predictions_for_annotation_import) - ) + model_run_with_data_rows.upsert_data_rows(use_data_row_id) + + predictions = [] + for data_row_id in use_data_row_id: + predictions.append( + Label( + data=GenericDataRowData( + uid=data_row_id, + ), + annotations=[ + ObjectAnnotation( + name="polygon", + extra={ + "uuid": "6d10fa30-3ea0-4e6c-bbb1-63f5c29fe3e4", + }, + value=Polygon( + points=[ + Point(x=147.692, y=118.154), + Point(x=142.769, y=104.923), + Point(x=57.846, y=118.769), + Point(x=28.308, y=169.846), + Point(x=147.692, y=118.154), + ], + ), + ), + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "15b7138f-4bbc-42c5-ae79-45d87b0a3b2a", + }, + value=Rectangle( + start=Point(x=58.0, y=48.0), + end=Point(x=70.0, y=113.0), + ), + ), + ObjectAnnotation( + name="polyline", + extra={ + "uuid": "cf4c6df9-c39c-4fbc-9541-470f6622978a", + }, + value=Line( + points=[ + Point(x=147.692, y=118.154), + Point(x=150.692, y=160.154), + ], + ), + ), + ], + ), + ) annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=predictions diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py new file mode 100644 index 000000000..0dc4c21c0 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py @@ -0,0 +1,79 @@ +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ClassificationAnnotation, Text + + +def test_generic_data_row_global_key(): + label_1 = Label( + data=GenericDataRowData(global_key="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"global_key": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"globalKey": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) + + +def test_generic_data_row_id(): + label_1 = Label( + data=GenericDataRowData(uid="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"uid": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"id": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) From ed9ba18689cd6f224305da17f68af1ca5d40e8fd Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Sat, 14 Sep 2024 20:48:07 -0500 Subject: [PATCH 2/7] Removed deserialize and subregistory logic --- .../data/serialization/ndjson/base.py | 12 ---- .../serialization/ndjson/classification.py | 13 ++-- .../data/serialization/ndjson/converter.py | 14 ---- .../data/serialization/ndjson/label.py | 64 +------------------ .../data/serialization/ndjson/metric.py | 5 +- .../labelbox/data/serialization/ndjson/mmc.py | 4 +- .../data/serialization/ndjson/objects.py | 49 +++++--------- .../data/serialization/ndjson/relationship.py | 4 +- 8 files changed, 28 insertions(+), 137 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py index 75ebdc100..d8d8cd36f 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py @@ -8,18 +8,6 @@ from ....annotated_types import Cuid -subclass_registry = {} - - -class _SubclassRegistryBase(BaseModel): - model_config = ConfigDict(extra="allow") - - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - if cls.__name__ != "NDAnnotation": - with threading.Lock(): - subclass_registry[cls.__name__] = cls - class DataRow(_CamelCaseMixin): id: Optional[str] = None diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index b127c4a90..2c3215265 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -30,7 +30,6 @@ model_serializer, ) from pydantic.alias_generators import to_camel -from .base import _SubclassRegistryBase class NDAnswer(ConfidenceMixin, CustomMetricsMixin): @@ -224,7 +223,7 @@ def from_common( # ====== End of subclasses -class NDText(NDAnnotation, NDTextSubclass, _SubclassRegistryBase): +class NDText(NDAnnotation, NDTextSubclass): @classmethod def from_common( cls, @@ -249,9 +248,7 @@ def from_common( ) -class NDChecklist( - NDAnnotation, NDChecklistSubclass, VideoSupported, _SubclassRegistryBase -): +class NDChecklist(NDAnnotation, NDChecklistSubclass, VideoSupported): @model_serializer(mode="wrap") def serialize_model(self, handler): res = handler(self) @@ -298,9 +295,7 @@ def from_common( ) -class NDRadio( - NDAnnotation, NDRadioSubclass, VideoSupported, _SubclassRegistryBase -): +class NDRadio(NDAnnotation, NDRadioSubclass, VideoSupported): @classmethod def from_common( cls, @@ -343,7 +338,7 @@ def serialize_model(self, handler): return res -class NDPromptText(NDAnnotation, NDPromptTextSubclass, _SubclassRegistryBase): +class NDPromptText(NDAnnotation, NDPromptTextSubclass): @classmethod def from_common( cls, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py index 01ab8454a..8176d7862 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py @@ -26,20 +26,6 @@ class NDJsonConverter: - @staticmethod - def deserialize(json_data: Iterable[Dict[str, Any]]) -> LabelGenerator: - """ - Converts ndjson data (prediction import format) into the common labelbox format. - - Args: - json_data: An iterable representing the ndjson data - Returns: - LabelGenerator containing the ndjson data. - """ - data = NDLabel(**{"annotations": copy.copy(json_data)}) - res = data.to_common() - return res - @staticmethod def serialize( labels: LabelCollection, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 18134a228..7039ae834 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -46,7 +46,6 @@ from .relationship import NDRelationship from .base import DataRow from pydantic import BaseModel, ValidationError -from .base import subclass_registry, _SubclassRegistryBase from pydantic_core import PydanticUndefined from contextlib import suppress @@ -67,68 +66,7 @@ class NDLabel(BaseModel): - annotations: List[_SubclassRegistryBase] - - def __init__(self, **kwargs): - # NOTE: Deserialization of subclasses in pydantic is difficult, see here https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 - # Below implements the subclass registry as mentioned in the article. The python dicts we pass in can be missing certain fields - # we essentially have to infer the type against all sub classes that have the _SubclasssRegistryBase inheritance. - # It works by checking if the keys of our annotations we are missing in matches any required subclass. - # More keys are prioritized over less keys (closer match). This is used when importing json to our base models not a lot of customer workflows - # depend on this method but this works for all our existing tests with the bonus of added validation. (no subclass found it throws an error) - - for index, annotation in enumerate(kwargs["annotations"]): - if isinstance(annotation, dict): - item_annotation_keys = annotation.keys() - key_subclass_combos = defaultdict(list) - for subclass in subclass_registry.values(): - # Get all required keys from subclass - annotation_keys = [] - for k, field in subclass.model_fields.items(): - if field.default == PydanticUndefined and k != "uuid": - if ( - hasattr(field, "alias") - and field.alias in item_annotation_keys - ): - annotation_keys.append(field.alias) - elif ( - hasattr(field, "validation_alias") - and field.validation_alias - in item_annotation_keys - ): - annotation_keys.append(field.validation_alias) - else: - annotation_keys.append(k) - - key_subclass_combos[subclass].extend(annotation_keys) - - # Sort by subclass that has the most keys i.e. the one with the most keys that matches is most likely our subclass - key_subclass_combos = dict( - sorted( - key_subclass_combos.items(), - key=lambda x: len(x[1]), - reverse=True, - ) - ) - - for subclass, key_subclass_combo in key_subclass_combos.items(): - # Choose the keys from our dict we supplied that matches the required keys of a subclass - check_required_keys = all( - key in list(item_annotation_keys) - for key in key_subclass_combo - ) - if check_required_keys: - # Keep trying subclasses until we find one that has valid values (does not throw an validation error) - with suppress(ValidationError): - annotation = subclass(**annotation) - break - if isinstance(annotation, dict): - raise ValueError( - f"Could not find subclass for fields: {item_annotation_keys}" - ) - - kwargs["annotations"][index] = annotation - super().__init__(**kwargs) + annotations: AnnotationType class _Relationship(BaseModel): """This object holds information about the relationship""" diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index 60d538b19..b28e575cf 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -15,7 +15,6 @@ ConfusionMatrixMetricConfidenceValue, ) from pydantic import ConfigDict, model_serializer -from .base import _SubclassRegistryBase class BaseNDMetric(NDJsonBase): @@ -33,7 +32,7 @@ def serialize_model(self, handler): return res -class NDConfusionMatrixMetric(BaseNDMetric, _SubclassRegistryBase): +class NDConfusionMatrixMetric(BaseNDMetric): metric_value: Union[ ConfusionMatrixMetricValue, ConfusionMatrixMetricConfidenceValue ] @@ -65,7 +64,7 @@ def from_common( ) -class NDScalarMetric(BaseNDMetric, _SubclassRegistryBase): +class NDScalarMetric(BaseNDMetric): metric_value: Union[ScalarMetricValue, ScalarMetricConfidenceValue] metric_name: Optional[str] = None aggregation: Optional[ScalarMetricAggregation] = ( diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 4be24f683..74d185f45 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -2,7 +2,7 @@ from labelbox.utils import _CamelCaseMixin -from .base import _SubclassRegistryBase, DataRow, NDAnnotation +from .base import DataRow, NDAnnotation from ...annotation_types.mmc import ( MessageSingleSelectionTask, MessageMultiSelectionTask, @@ -20,7 +20,7 @@ class MessageTaskData(_CamelCaseMixin): ] -class NDMessageTask(NDAnnotation, _SubclassRegistryBase): +class NDMessageTask(NDAnnotation): message_evaluation_task: MessageTaskData def to_common(self) -> MessageEvaluationTaskAnnotation: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index a1465fa06..91abface6 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -52,7 +52,7 @@ NDSubclassification, NDSubclassificationType, ) -from .base import DataRow, NDAnnotation, NDJsonBase, _SubclassRegistryBase +from .base import DataRow, NDAnnotation, NDJsonBase from pydantic import BaseModel @@ -81,9 +81,7 @@ class Bbox(BaseModel): width: float -class NDPoint( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPoint(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): point: _Point def to_common(self) -> Point: @@ -114,7 +112,7 @@ def from_common( ) -class NDFramePoint(VideoSupported, _SubclassRegistryBase): +class NDFramePoint(VideoSupported): point: _Point classifications: List[NDSubclassificationType] = [] @@ -148,9 +146,7 @@ def from_common( ) -class NDLine( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDLine(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): line: List[_Point] def to_common(self) -> Line: @@ -181,7 +177,7 @@ def from_common( ) -class NDFrameLine(VideoSupported, _SubclassRegistryBase): +class NDFrameLine(VideoSupported): line: List[_Point] classifications: List[NDSubclassificationType] = [] @@ -215,7 +211,7 @@ def from_common( ) -class NDDicomLine(NDFrameLine, _SubclassRegistryBase): +class NDDicomLine(NDFrameLine): def to_common( self, name: str, @@ -234,9 +230,7 @@ def to_common( ) -class NDPolygon( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPolygon(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): polygon: List[_Point] def to_common(self) -> Polygon: @@ -267,9 +261,7 @@ def from_common( ) -class NDRectangle( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDRectangle(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): bbox: Bbox def to_common(self) -> Rectangle: @@ -313,7 +305,7 @@ def from_common( ) -class NDDocumentRectangle(NDRectangle, _SubclassRegistryBase): +class NDDocumentRectangle(NDRectangle): page: int unit: str @@ -360,7 +352,7 @@ def from_common( ) -class NDFrameRectangle(VideoSupported, _SubclassRegistryBase): +class NDFrameRectangle(VideoSupported): bbox: Bbox classifications: List[NDSubclassificationType] = [] @@ -496,7 +488,7 @@ def to_common( ] -class NDSegments(NDBaseObject, _SubclassRegistryBase): +class NDSegments(NDBaseObject): segments: List[NDSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -532,7 +524,7 @@ def from_common( ) -class NDDicomSegments(NDBaseObject, DicomSupported, _SubclassRegistryBase): +class NDDicomSegments(NDBaseObject, DicomSupported): segments: List[NDDicomSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -580,9 +572,7 @@ class _PNGMask(BaseModel): png: str -class NDMask( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDMask(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): mask: Union[_URIMask, _PNGMask] def to_common(self) -> Mask: @@ -646,7 +636,6 @@ class NDVideoMasks( NDJsonBase, ConfidenceMixin, CustomMetricsNotSupportedMixin, - _SubclassRegistryBase, ): masks: NDVideoMasksFramesInstances @@ -678,7 +667,7 @@ def from_common(cls, annotation, data): ) -class NDDicomMasks(NDVideoMasks, DicomSupported, _SubclassRegistryBase): +class NDDicomMasks(NDVideoMasks, DicomSupported): def to_common(self) -> DICOMMaskAnnotation: return DICOMMaskAnnotation( frames=self.masks.frames, @@ -702,9 +691,7 @@ class Location(BaseModel): end: int -class NDTextEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDTextEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): location: Location def to_common(self) -> TextEntity: @@ -738,9 +725,7 @@ def from_common( ) -class NDDocumentEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDDocumentEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): name: str text_selections: List[DocumentTextSelection] @@ -774,7 +759,7 @@ def from_common( ) -class NDConversationEntity(NDTextEntity, _SubclassRegistryBase): +class NDConversationEntity(NDTextEntity): message_id: str def to_common(self) -> ConversationEntity: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index fbea7e477..94c8e9879 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -5,7 +5,7 @@ from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType -from .base import DataRow, _SubclassRegistryBase +from .base import DataRow SUPPORTED_ANNOTATIONS = NDObjectType @@ -16,7 +16,7 @@ class _Relationship(BaseModel): type: str -class NDRelationship(NDAnnotation, _SubclassRegistryBase): +class NDRelationship(NDAnnotation): relationship: _Relationship @staticmethod From fd5c3917d179696aafba7c296a99508de24163bc Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:15:30 -0500 Subject: [PATCH 3/7] temp added v6 to workflows so test kick off --- .github/workflows/lbox-develop.yml | 32 ++++++++--------- .github/workflows/python-package-develop.yml | 38 ++++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/lbox-develop.yml b/.github/workflows/lbox-develop.yml index ba1e4f34e..497ef4afb 100644 --- a/.github/workflows/lbox-develop.yml +++ b/.github/workflows/lbox-develop.yml @@ -2,9 +2,9 @@ name: LBox Develop on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -27,7 +27,7 @@ jobs: - uses: dorny/paths-filter@v3 id: filter with: - list-files: 'json' + list-files: "json" filters: | lbox: - 'libs/lbox*/**' @@ -36,16 +36,16 @@ jobs: with: files-changed: ${{ steps.filter.outputs.lbox_files }} build: - needs: ['path-filter'] + needs: ["path-filter"] if: ${{ needs.path-filter.outputs.lbox == 'true' }} runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} + matrix: + include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} concurrency: group: lbox-staging-${{ matrix.python-version }}-${{ matrix.package }} - cancel-in-progress: false + cancel-in-progress: false steps: - uses: actions/checkout@v4 with: @@ -67,19 +67,19 @@ jobs: env: LABELBOX_TEST_API_KEY: ${{ secrets[matrix.api-key] }} DA_GCP_LABELBOX_API_KEY: ${{ secrets[matrix.da-test-key] }} - LABELBOX_TEST_ENVIRON: 'staging' + LABELBOX_TEST_ENVIRON: "staging" run: rye run integration test-pypi: runs-on: ubuntu-latest - needs: ['build', 'path-filter'] + needs: ["build", "path-filter"] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} - environment: + environment: name: Test-PyPI-${{ matrix.package }} - url: 'https://test.pypi.org/p/${{ matrix.package }}' + url: "https://test.pypi.org/p/${{ matrix.package }}" permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -90,7 +90,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: '3.8' + python-version: "3.8" - name: Create build id: create-build working-directory: libs/${{ matrix.package }} @@ -107,11 +107,11 @@ jobs: repository-url: https://test.pypi.org/legacy/ test-container: runs-on: ubuntu-latest - needs: ['build', 'path-filter'] + needs: ["build", "path-filter"] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -163,4 +163,4 @@ jobs: - name: Build and push (Pull Request) Output if: github.event_name == 'pull_request' run: | - echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" \ No newline at end of file + echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/python-package-develop.yml b/.github/workflows/python-package-develop.yml index 05eff5dc4..dcec45103 100644 --- a/.github/workflows/python-package-develop.yml +++ b/.github/workflows/python-package-develop.yml @@ -2,9 +2,9 @@ name: Labelbox Python SDK Staging (Develop) on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -37,10 +37,10 @@ jobs: uses: actions/checkout@v2 with: ref: ${{ github.event.repository.default_branch }} - + - name: Fetch tags run: git fetch --tags - + - name: Get Latest SDK versions id: get_sdk_versions run: | @@ -50,9 +50,9 @@ jobs: exit 1 fi echo "sdk_versions=$sdk_versions" - echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT + echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT build: - needs: ['path-filter', 'get_sdk_versions'] + needs: ["path-filter", "get_sdk_versions"] if: ${{ needs.path-filter.outputs.labelbox == 'true' }} strategy: fail-fast: false @@ -84,15 +84,15 @@ jobs: da-test-key: ${{ matrix.da-test-key }} sdk-version: ${{ matrix.sdk-version }} fixture-profile: true - test-env: 'staging' + test-env: "staging" secrets: inherit test-pypi: runs-on: ubuntu-latest - needs: ['path-filter'] + needs: ["path-filter"] continue-on-error: true - environment: + environment: name: Test-PyPI - url: 'https://test.pypi.org/p/labelbox-test' + url: "https://test.pypi.org/p/labelbox-test" permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -103,7 +103,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: '3.8' + python-version: "3.8" - name: Create build id: create-build working-directory: libs/labelbox @@ -119,8 +119,8 @@ jobs: packages-dir: dist/ repository-url: https://test.pypi.org/legacy/ test-container: - runs-on: ubuntu-latest - needs: ['path-filter'] + runs-on: ubuntu-latest + needs: ["path-filter"] continue-on-error: true permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -138,14 +138,14 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - + - name: Log in to the Container registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Build and push (Develop) if: github.event_name == 'push' uses: docker/build-push-action@v5 @@ -154,7 +154,7 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 @@ -162,7 +162,7 @@ jobs: tags: | ${{ env.CONTAINER_IMAGE }}:develop ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} - + - name: Build and push (Pull Request) if: github.event_name == 'pull_request' uses: docker/build-push-action@v5 @@ -171,10 +171,10 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 tags: | - ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} \ No newline at end of file + ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} From 5f4f73356a991da613be6f4e98ef321f275199b8 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:19:09 -0500 Subject: [PATCH 4/7] fixed linting tests --- .../schema/labeling_service_dashboard.py | 38 ++++--- .../unit/test_labeling_service_dashboard.py | 102 +++++++++--------- 2 files changed, 76 insertions(+), 64 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index 2052897f6..c5e1fa11e 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -84,7 +84,8 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if not self.client.enable_experimental: raise RuntimeError( - "Please enable experimental in client to use LabelingService") + "Please enable experimental in client to use LabelingService" + ) @property def service_type(self): @@ -97,20 +98,28 @@ def service_type(self): if self.editor_task_type is None: return sentence_case(self.media_type.value) - if (self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation - and self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Offline chat evaluation" - if (self.editor_task_type == EditorTaskType.ModelChatEvaluation and - self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.ModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Live chat evaluation" - if (self.editor_task_type == EditorTaskType.ResponseCreation and - self.media_type == MediaType.Text): + if ( + self.editor_task_type == EditorTaskType.ResponseCreation + and self.media_type == MediaType.Text + ): return "Response creation" - if (self.media_type == MediaType.LLMPromptCreation or - self.media_type == MediaType.LLMPromptResponseCreation): + if ( + self.media_type == MediaType.LLMPromptCreation + or self.media_type == MediaType.LLMPromptResponseCreation + ): return "Prompt response creation" return sentence_case(self.media_type.value) @@ -154,7 +163,8 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) else: template = Template( """query SearchProjectsPyApi($$first: Int, $$from: String) { @@ -164,11 +174,13 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) query_str = template.substitute( labeling_dashboard_selections=GRAPHQL_QUERY_SELECTIONS, search_query=build_search_filter(search_query) - if search_query else None, + if search_query + else None, ) params: Dict[str, Union[str, int]] = {} @@ -186,7 +198,7 @@ def convert_to_labeling_service_dashboard(client, data): experimental=True, ) - @model_validator(mode='before') + @model_validator(mode="before") def convert_boost_data(cls, data): if "boostStatus" in data: data["status"] = LabelingServiceStatus(data.pop("boostStatus")) diff --git a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py index 8ecdef2f1..061efbadf 100644 --- a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py +++ b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py @@ -5,23 +5,23 @@ def test_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count is None @@ -29,23 +29,23 @@ def test_no_tasks_remaining_count(): def test_tasks_remaining_count_exists(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 1, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 1, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 1 @@ -53,23 +53,23 @@ def test_tasks_remaining_count_exists(): def test_tasks_total_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 1, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 1, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 1, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 1, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 0 From 54909b4184778d323d348530ce3cbe41418ac4a0 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:31:28 -0500 Subject: [PATCH 5/7] removed formatting --- .github/workflows/lbox-develop.yml | 28 ++++++++-------- .github/workflows/python-package-develop.yml | 34 ++++++++++---------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/lbox-develop.yml b/.github/workflows/lbox-develop.yml index 497ef4afb..efb642f66 100644 --- a/.github/workflows/lbox-develop.yml +++ b/.github/workflows/lbox-develop.yml @@ -27,7 +27,7 @@ jobs: - uses: dorny/paths-filter@v3 id: filter with: - list-files: "json" + list-files: 'json' filters: | lbox: - 'libs/lbox*/**' @@ -36,16 +36,16 @@ jobs: with: files-changed: ${{ steps.filter.outputs.lbox_files }} build: - needs: ["path-filter"] + needs: ['path-filter'] if: ${{ needs.path-filter.outputs.lbox == 'true' }} runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} + matrix: + include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} concurrency: group: lbox-staging-${{ matrix.python-version }}-${{ matrix.package }} - cancel-in-progress: false + cancel-in-progress: false steps: - uses: actions/checkout@v4 with: @@ -67,19 +67,19 @@ jobs: env: LABELBOX_TEST_API_KEY: ${{ secrets[matrix.api-key] }} DA_GCP_LABELBOX_API_KEY: ${{ secrets[matrix.da-test-key] }} - LABELBOX_TEST_ENVIRON: "staging" + LABELBOX_TEST_ENVIRON: 'staging' run: rye run integration test-pypi: runs-on: ubuntu-latest - needs: ["build", "path-filter"] + needs: ['build', 'path-filter'] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} - environment: + environment: name: Test-PyPI-${{ matrix.package }} - url: "https://test.pypi.org/p/${{ matrix.package }}" + url: 'https://test.pypi.org/p/${{ matrix.package }}' permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -90,7 +90,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: "3.8" + python-version: '3.8' - name: Create build id: create-build working-directory: libs/${{ matrix.package }} @@ -107,11 +107,11 @@ jobs: repository-url: https://test.pypi.org/legacy/ test-container: runs-on: ubuntu-latest - needs: ["build", "path-filter"] + needs: ['build', 'path-filter'] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -163,4 +163,4 @@ jobs: - name: Build and push (Pull Request) Output if: github.event_name == 'pull_request' run: | - echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" + echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" \ No newline at end of file diff --git a/.github/workflows/python-package-develop.yml b/.github/workflows/python-package-develop.yml index dcec45103..769d04c74 100644 --- a/.github/workflows/python-package-develop.yml +++ b/.github/workflows/python-package-develop.yml @@ -37,10 +37,10 @@ jobs: uses: actions/checkout@v2 with: ref: ${{ github.event.repository.default_branch }} - + - name: Fetch tags run: git fetch --tags - + - name: Get Latest SDK versions id: get_sdk_versions run: | @@ -50,9 +50,9 @@ jobs: exit 1 fi echo "sdk_versions=$sdk_versions" - echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT + echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT build: - needs: ["path-filter", "get_sdk_versions"] + needs: ['path-filter', 'get_sdk_versions'] if: ${{ needs.path-filter.outputs.labelbox == 'true' }} strategy: fail-fast: false @@ -84,15 +84,15 @@ jobs: da-test-key: ${{ matrix.da-test-key }} sdk-version: ${{ matrix.sdk-version }} fixture-profile: true - test-env: "staging" + test-env: 'staging' secrets: inherit test-pypi: runs-on: ubuntu-latest - needs: ["path-filter"] + needs: ['path-filter'] continue-on-error: true - environment: + environment: name: Test-PyPI - url: "https://test.pypi.org/p/labelbox-test" + url: 'https://test.pypi.org/p/labelbox-test' permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -103,7 +103,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: "3.8" + python-version: '3.8' - name: Create build id: create-build working-directory: libs/labelbox @@ -119,8 +119,8 @@ jobs: packages-dir: dist/ repository-url: https://test.pypi.org/legacy/ test-container: - runs-on: ubuntu-latest - needs: ["path-filter"] + runs-on: ubuntu-latest + needs: ['path-filter'] continue-on-error: true permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -138,14 +138,14 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - + - name: Log in to the Container registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Build and push (Develop) if: github.event_name == 'push' uses: docker/build-push-action@v5 @@ -154,7 +154,7 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 @@ -162,7 +162,7 @@ jobs: tags: | ${{ env.CONTAINER_IMAGE }}:develop ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} - + - name: Build and push (Pull Request) if: github.event_name == 'pull_request' uses: docker/build-push-action@v5 @@ -171,10 +171,10 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 tags: | - ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} + ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} \ No newline at end of file From da7aeaa8cf27120e0e15335b3e9b786b894f9bc7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 14:36:23 -0500 Subject: [PATCH 6/7] Removed Coco --- .../labelbox/data/serialization/__init__.py | 1 - .../data/serialization/coco/__init__.py | 1 - .../data/serialization/coco/annotation.py | 78 ----- .../data/serialization/coco/categories.py | 17 -- .../data/serialization/coco/converter.py | 170 ----------- .../labelbox/data/serialization/coco/image.py | 52 ---- .../serialization/coco/instance_dataset.py | 266 ------------------ .../serialization/coco/panoptic_dataset.py | 242 ---------------- .../labelbox/data/serialization/coco/path.py | 9 - .../data/serialization/coco/test_coco.py | 38 --- 10 files changed, 874 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/__init__.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/annotation.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/categories.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/converter.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/image.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/path.py delete mode 100644 libs/labelbox/tests/data/serialization/coco/test_coco.py diff --git a/libs/labelbox/src/labelbox/data/serialization/__init__.py b/libs/labelbox/src/labelbox/data/serialization/__init__.py index 71a9b3443..38cb5edff 100644 --- a/libs/labelbox/src/labelbox/data/serialization/__init__.py +++ b/libs/labelbox/src/labelbox/data/serialization/__init__.py @@ -1,2 +1 @@ from .ndjson import NDJsonConverter -from .coco import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py b/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py deleted file mode 100644 index 4511e89ee..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .converter import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py b/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py deleted file mode 100644 index e387cb7d9..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py +++ /dev/null @@ -1,78 +0,0 @@ -from typing import Any, Tuple, List, Union -from pathlib import Path -from collections import defaultdict -import warnings - -from ...annotation_types.relationship import RelationshipAnnotation -from ...annotation_types.metrics.confusion_matrix import ConfusionMatrixMetric -from ...annotation_types.metrics.scalar import ScalarMetric -from ...annotation_types.video import VideoMaskAnnotation -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.classification.classification import ( - ClassificationAnnotation, -) - -import numpy as np - -from .path import PathSerializerMixin -from pydantic import BaseModel - - -def rle_decoding(rle_arr: List[int], w: int, h: int) -> np.ndarray: - indices = [] - for idx, cnt in zip(rle_arr[0::2], rle_arr[1::2]): - indices.extend( - list(range(idx - 1, idx + cnt - 1)) - ) # RLE is 1-based index - mask = np.zeros(h * w, dtype=np.uint8) - mask[indices] = 1 - return mask.reshape((w, h)).T - - -def get_annotation_lookup(annotations): - """Get annotations from Label.annotations objects - - Args: - annotations (Label.annotations): Annotations attached to labelbox Label object used as private method - """ - annotation_lookup = defaultdict(list) - for annotation in annotations: - # Provide a default value of None if the attribute doesn't exist - attribute_value = getattr(annotation, "image_id", None) or getattr( - annotation, "name", None - ) - annotation_lookup[attribute_value].append(annotation) - return annotation_lookup - - -class SegmentInfo(BaseModel): - id: int - category_id: int - area: Union[float, int] - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class RLE(BaseModel): - counts: List[int] - size: Tuple[int, int] # h,w or w,h? - - -class COCOObjectAnnotation(BaseModel): - # All segmentations for a particular class in an image... - # So each image will have one of these for each class present in the image.. - # Annotations only exist if there is data.. - id: int - image_id: int - category_id: int - segmentation: Union[RLE, List[List[float]]] # [[x1,y1,x2,y2,x3,y3...]] - area: float - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class PanopticAnnotation(PathSerializerMixin): - # One to one relationship between image and panoptic annotation - image_id: int - file_name: Path - segments_info: List[SegmentInfo] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py b/libs/labelbox/src/labelbox/data/serialization/coco/categories.py deleted file mode 100644 index 60ba30fce..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py +++ /dev/null @@ -1,17 +0,0 @@ -import sys -from hashlib import md5 - -from pydantic import BaseModel - - -class Categories(BaseModel): - id: int - name: str - supercategory: str - isthing: int = 1 - - -def hash_category_name(name: str) -> int: - return int.from_bytes( - md5(name.encode("utf-8")).hexdigest().encode("utf-8"), "little" - ) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py b/libs/labelbox/src/labelbox/data/serialization/coco/converter.py deleted file mode 100644 index e270b7573..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py +++ /dev/null @@ -1,170 +0,0 @@ -from typing import Dict, Any, Union -from pathlib import Path -import os -import warnings - -from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...serialization.coco.instance_dataset import CocoInstanceDataset -from ...serialization.coco.panoptic_dataset import CocoPanopticDataset - - -def create_path_if_not_exists( - path: Union[Path, str], ignore_existing_data=False -): - path = Path(path) - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - elif not ignore_existing_data and os.listdir(path): - raise ValueError( - f"Directory `{path}`` must be empty. Or set `ignore_existing_data=True`" - ) - return path - - -def validate_path(path: Union[Path, str], name: str): - path = Path(path) - if not path.exists(): - raise ValueError(f"{name} `{path}` must exist") - return path - - -class COCOConverter: - """ - Class for converting between coco and labelbox formats - Note that this class is only compatible with image data. - - Subclasses are currently ignored. - To use subclasses, manually flatten them before using the converter. - """ - - @staticmethod - def serialize_instances( - labels: LabelCollection, - image_root: Union[Path, str], - ignore_existing_data=False, - max_workers=8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process - Returns: - A dictionary containing labels in the coco object format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - return CocoInstanceDataset.from_common( - labels=labels, image_root=image_root, max_workers=max_workers - ).model_dump() - - @staticmethod - def serialize_panoptic( - labels: LabelCollection, - image_root: Union[Path, str], - mask_root: Union[Path, str], - all_stuff: bool = False, - ignore_existing_data=False, - max_workers: int = 8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - mask_root: Where to save segmentation masks to - all_stuff: If rectangle or polygon annotations are encountered, they will be treated as instances. - To convert them to stuff class set `all_stuff=True`. - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process. - Returns: - A dictionary containing labels in the coco panoptic format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - mask_root = create_path_if_not_exists(mask_root, ignore_existing_data) - return CocoPanopticDataset.from_common( - labels=labels, - image_root=image_root, - mask_root=mask_root, - all_stuff=all_stuff, - max_workers=max_workers, - ).model_dump() - - @staticmethod - def deserialize_panoptic( - json_data: Dict[str, Any], - image_root: Union[Path, str], - mask_root: Union[Path, str], - ) -> LabelGenerator: - """ - Convert coco panoptic data into the labelbox format (as a LabelGenerator). - - Args: - json_data: panoptic data as a dict - image_root: Path to local images that are referenced by the panoptic json - mask_root: Path to local segmentation masks that are referenced by the panoptic json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - mask_root = validate_path(mask_root, "mask_root") - objs = CocoPanopticDataset(**json_data) - gen = objs.to_common(image_root, mask_root) - return LabelGenerator(data=gen) - - @staticmethod - def deserialize_instances( - json_data: Dict[str, Any], image_root: Path - ) -> LabelGenerator: - """ - Convert coco object data into the labelbox format (as a LabelGenerator). - - Args: - json_data: coco object data as a dict - image_root: Path to local images that are referenced by the coco object json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - objs = CocoInstanceDataset(**json_data) - gen = objs.to_common(image_root) - return LabelGenerator(data=gen) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/image.py b/libs/labelbox/src/labelbox/data/serialization/coco/image.py deleted file mode 100644 index cef173377..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/image.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path - -from typing import Optional, Tuple -from PIL import Image -import imagesize - -from .path import PathSerializerMixin -from ...annotation_types import Label - - -class CocoImage(PathSerializerMixin): - id: int - width: int - height: int - file_name: Path - license: Optional[int] = None - flickr_url: Optional[str] = None - coco_url: Optional[str] = None - - -def get_image_id(label: Label, idx: int) -> int: - if label.data.file_path is not None: - file_name = label.data.file_path.replace(".jpg", "") - if file_name.isdecimal(): - return file_name - return idx - - -def get_image(label: Label, image_root: Path, image_id: str) -> CocoImage: - path = Path(image_root, f"{image_id}.jpg") - if not path.exists(): - im = Image.fromarray(label.data.value) - im.save(path) - w, h = im.size - else: - w, h = imagesize.get(str(path)) - return CocoImage(id=image_id, width=w, height=h, file_name=Path(path.name)) - - -def id_to_rgb(id: int) -> Tuple[int, int, int]: - digits = [] - for _ in range(3): - digits.append(id % 256) - id //= 256 - return digits - - -def rgb_to_id(red: int, green: int, blue: int) -> int: - id = blue * 256 * 256 - id += green * 256 - id += red - return id diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py deleted file mode 100644 index 5241e596f..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py +++ /dev/null @@ -1,266 +0,0 @@ -# https://cocodataset.org/#format-data - -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Any, Dict, List, Tuple, Optional -from pathlib import Path - -import numpy as np -from tqdm import tqdm - -from ...annotation_types import ( - ImageData, - MaskData, - Mask, - ObjectAnnotation, - Label, - Polygon, - Point, - Rectangle, -) -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .annotation import ( - COCOObjectAnnotation, - RLE, - get_annotation_lookup, - rle_decoding, -) -from .image import CocoImage, get_image, get_image_id -from pydantic import BaseModel - - -def mask_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - # This is going to fill any holes into the multipolygon - # If you need to support holes use the panoptic data format - shapely = annotation.value.shapely.simplify(1).buffer(0) - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - # Iterate over polygon once or multiple polygon for each item - area = shapely.area - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[ - np.array(s.exterior.coords).ravel().tolist() - for s in ([shapely] if shapely.type == "Polygon" else shapely.geoms) - ], - area=area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def vector_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> COCOObjectAnnotation: - shapely = annotation.value.shapely - xmin, ymin, xmax, ymax = shapely.bounds - segmentation = [] - if isinstance(annotation.value, Polygon): - for point in annotation.value.points: - segmentation.extend([point.x, point.y]) - else: - box = annotation.value - segmentation.extend( - [ - box.start.x, - box.start.y, - box.end.x, - box.start.y, - box.end.x, - box.end.y, - box.start.x, - box.end.y, - ] - ) - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[segmentation], - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def rle_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> ObjectAnnotation: - mask = rle_decoding( - class_annotations.segmentation.counts, - *class_annotations.segmentation.size[::-1], - ) - return ObjectAnnotation( - name=class_name, - value=Mask(mask=MaskData.from_2D_arr(mask), color=[1, 1, 1]), - ) - - -def segmentations_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> List[ObjectAnnotation]: - # Technically it is polygons. But the key in coco is called segmentations.. - annotations = [] - for points in class_annotations.segmentation: - annotations.append( - ObjectAnnotation( - name=class_name, - value=Polygon( - points=[ - Point(x=points[i], y=points[i + 1]) - for i in range(0, len(points), 2) - ] - ), - ) - ) - return annotations - - -def object_annotation_to_coco( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - if isinstance(annotation.value, Mask): - return mask_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - elif isinstance(annotation.value, (Polygon, Rectangle)): - return vector_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - else: - return None - - -def process_label( - label: Label, idx: int, image_root: str, max_annotations_per_image=10000 -) -> Tuple[np.ndarray, List[COCOObjectAnnotation], Dict[str, str]]: - annot_idx = idx * max_annotations_per_image - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - coco_annotations = [] - annotation_lookup = get_annotation_lookup(label.annotations) - categories = {} - for class_name in annotation_lookup: - for annotation in annotation_lookup[class_name]: - category_id = categories.get(annotation.name) or hash_category_name( - annotation.name - ) - coco_annotation = object_annotation_to_coco( - annotation, annot_idx, image_id, category_id - ) - if coco_annotation is not None: - coco_annotations.append(coco_annotation) - if annotation.name not in categories: - categories[annotation.name] = category_id - annot_idx += 1 - - return image, coco_annotations, categories - - -class CocoInstanceDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[COCOObjectAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, labels: LabelCollection, image_root: Path, max_workers=8 - ): - all_coco_annotations = [] - categories = {} - images = [] - futures = [] - coco_categories = {} - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit(process_label, label, idx, image_root) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.extend(result[1]) - coco_categories.update(result[2]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=1, - ) - for name, idx in coco_categories.items() - ] - for annot in all_coco_annotations: - annot.category_id = category_mapping[annot.category_id] - - return CocoInstanceDataset( - info={"image_root": image_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = get_annotation_lookup(self.annotations) - - for image in self.images: - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - - data = ImageData(file_path=str(im_path)) - annotations = [] - for class_annotations in annotation_lookup[image.id]: - if isinstance(class_annotations.segmentation, RLE): - annotations.append( - rle_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - elif isinstance(class_annotations.segmentation, list): - annotations.extend( - segmentations_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - yield Label(data=data, annotations=annotations) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py deleted file mode 100644 index cbb410548..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py +++ /dev/null @@ -1,242 +0,0 @@ -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Dict, Any, List, Union -from pathlib import Path - -from tqdm import tqdm -import numpy as np -from PIL import Image - -from ...annotation_types.geometry import Polygon, Rectangle -from ...annotation_types import Label -from ...annotation_types.geometry.mask import Mask -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.data.raster import MaskData, ImageData -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .image import CocoImage, get_image, get_image_id, id_to_rgb -from .annotation import PanopticAnnotation, SegmentInfo, get_annotation_lookup -from pydantic import BaseModel - - -def vector_to_coco_segment_info( - canvas: np.ndarray, - annotation: ObjectAnnotation, - annotation_idx: int, - image: CocoImage, - category_id: int, -): - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = annotation.value.draw( - height=image.height, - width=image.width, - canvas=canvas, - color=id_to_rgb(annotation_idx), - ) - - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def mask_to_coco_segment_info( - canvas: np.ndarray, annotation, annotation_idx: int, category_id -): - color = id_to_rgb(annotation_idx) - mask = annotation.value.draw(color=color) - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = np.where(canvas == (0, 0, 0), mask, canvas) - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def process_label( - label: Label, idx: Union[int, str], image_root, mask_root, all_stuff=False -): - """ - Masks become stuff - Polygon and rectangle become thing - """ - annotations = get_annotation_lookup(label.annotations) - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - canvas = np.zeros((image.height, image.width, 3)) - - segments = [] - categories = {} - is_thing = {} - - for class_idx, class_name in enumerate(annotations): - for annotation_idx, annotation in enumerate(annotations[class_name]): - categories[annotation.name] = hash_category_name(annotation.name) - if isinstance(annotation.value, Mask): - coco_segment_info = mask_to_coco_segment_info( - canvas, - annotation, - class_idx + 1, - categories[annotation.name], - ) - - if coco_segment_info is None: - # Filter out empty masks - continue - - segment, canvas = coco_segment_info - segments.append(segment) - is_thing[annotation.name] = 0 - - elif isinstance(annotation.value, (Polygon, Rectangle)): - coco_vector_info = vector_to_coco_segment_info( - canvas, - annotation, - annotation_idx=(class_idx if all_stuff else annotation_idx) - + 1, - image=image, - category_id=categories[annotation.name], - ) - - if coco_vector_info is None: - # Filter out empty annotations - continue - - segment, canvas = coco_vector_info - segments.append(segment) - is_thing[annotation.name] = 1 - int(all_stuff) - - mask_file = str(image.file_name).replace(".jpg", ".png") - mask_file = Path(mask_root, mask_file) - Image.fromarray(canvas.astype(np.uint8)).save(mask_file) - return ( - image, - PanopticAnnotation( - image_id=image_id, - file_name=Path(mask_file.name), - segments_info=segments, - ), - categories, - is_thing, - ) - - -class CocoPanopticDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[PanopticAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, - labels: LabelCollection, - image_root, - mask_root, - all_stuff, - max_workers=8, - ): - all_coco_annotations = [] - coco_categories = {} - coco_things = {} - images = [] - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit( - process_label, - label, - idx, - image_root, - mask_root, - all_stuff, - ) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root, mask_root, all_stuff) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.append(result[1]) - coco_categories.update(result[2]) - coco_things.update(result[3]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=coco_things.get(name, 1), - ) - for name, idx in coco_categories.items() - ] - - for annot in all_coco_annotations: - for segment in annot.segments_info: - segment.category_id = category_mapping[segment.category_id] - - return CocoPanopticDataset( - info={"image_root": image_root, "mask_root": mask_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root: Path, mask_root: Path): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = { - annotation.image_id: annotation for annotation in self.annotations - } - for image in self.images: - annotations = [] - annotation = annotation_lookup[image.id] - - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - if not str(annotation.file_name).endswith(".png"): - raise ValueError( - f"COCO masks must be stored as png files and their extension must be `.png`. Found {annotation.file_name}" - ) - mask = MaskData( - file_path=str(Path(mask_root, annotation.file_name)) - ) - - for segmentation in annotation.segments_info: - category = category_lookup[segmentation.category_id] - annotations.append( - ObjectAnnotation( - name=category.name, - value=Mask(mask=mask, color=id_to_rgb(segmentation.id)), - ) - ) - data = ImageData(file_path=str(im_path)) - yield Label(data=data, annotations=annotations) - del annotation_lookup[image.id] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/path.py b/libs/labelbox/src/labelbox/data/serialization/coco/path.py deleted file mode 100644 index c3be84f31..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/path.py +++ /dev/null @@ -1,9 +0,0 @@ -from pathlib import Path -from pydantic import BaseModel, model_serializer - - -class PathSerializerMixin(BaseModel): - @model_serializer(mode="wrap") - def serialize_model(self, handler): - res = handler(self) - return {k: str(v) if isinstance(v, Path) else v for k, v in res.items()} diff --git a/libs/labelbox/tests/data/serialization/coco/test_coco.py b/libs/labelbox/tests/data/serialization/coco/test_coco.py deleted file mode 100644 index a7c733ce5..000000000 --- a/libs/labelbox/tests/data/serialization/coco/test_coco.py +++ /dev/null @@ -1,38 +0,0 @@ -import json -from pathlib import Path - -from labelbox.data.serialization.coco import COCOConverter - -COCO_ASSETS_DIR = "tests/data/assets/coco" - - -def run_instances(tmpdir): - instance_json = json.load(open(Path(COCO_ASSETS_DIR, "instances.json"))) - res = COCOConverter.deserialize_instances( - instance_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances( - res, - Path(tmpdir), - ) - - -def test_rle_objects(tmpdir): - rle_json = json.load(open(Path(COCO_ASSETS_DIR, "rle.json"))) - res = COCOConverter.deserialize_instances( - rle_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances(res, tmpdir) - - -def test_panoptic(tmpdir): - panoptic_json = json.load(open(Path(COCO_ASSETS_DIR, "panoptic.json"))) - image_dir, mask_dir = [ - Path(COCO_ASSETS_DIR, dir_name) for dir_name in ["images", "masks"] - ] - res = COCOConverter.deserialize_panoptic(panoptic_json, image_dir, mask_dir) - back = COCOConverter.serialize_panoptic( - res, - Path(f"/{tmpdir}/images_panoptic"), - Path(f"/{tmpdir}/masks_panoptic"), - ) From 53eff7a765f18ad531fdf96a941ec998088e3991 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 20:43:24 -0500 Subject: [PATCH 7/7] fix merge conflict --- .../test_bulk_import_request.py | 210 ------------------ 1 file changed, 210 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index b2289503e..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,210 +0,0 @@ -import uuid -from labelbox import parser, Project -import pytest - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0