From 4286ec4e51c3a136a96f6bb36c039b04fe400527 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Sat, 14 Sep 2024 20:43:38 -0500 Subject: [PATCH 01/13] finish remaining integration tests --- .../test_bulk_import_request.py | 48 ----------- .../data/annotation_import/test_data_types.py | 83 ------------------- .../test_generic_data_types.py | 70 ---------------- .../test_mea_prediction_import.py | 70 ++++++++++++++-- .../ndjson/test_generic_data_row_data.py | 79 ++++++++++++++++++ 5 files changed, 144 insertions(+), 206 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_import/test_data_types.py create mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py index 9abae1422..b2289503e 100644 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py @@ -1,38 +1,11 @@ -from unittest.mock import patch import uuid from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation from labelbox.data.serialization import NDJsonConverter from labelbox.exceptions import MALValidationError, UuidError from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType """ - Here we only want to check that the uploads are calling the validation @@ -87,27 +60,6 @@ def test_create_from_objects( ) -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - def test_create_from_local_file( tmp_path, predictions, module_project, annotation_import_test_helpers ): diff --git a/libs/labelbox/tests/data/annotation_import/test_data_types.py b/libs/labelbox/tests/data/annotation_import/test_data_types.py deleted file mode 100644 index 1e45295ef..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_data_types.py +++ /dev/null @@ -1,83 +0,0 @@ -import pytest - -from labelbox.data.annotation_types.data import ( - AudioData, - ConversationData, - DocumentData, - HTMLData, - ImageData, - TextData, -) -from labelbox.data.serialization import NDJsonConverter -from labelbox.data.annotation_types.data.video import VideoData - -import labelbox.types as lb_types -from labelbox.schema.media_type import MediaType - -# Unit test for label based on data type. -# TODO: Dicom removed it is unstable when you deserialize and serialize on label import. If we intend to keep this library this needs add generic data types tests work with this data type. -# TODO: add MediaType.LLMPromptResponseCreation(data gen) once supported and llm human preference once media type is added - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, AudioData), - (MediaType.Html, HTMLData), - (MediaType.Image, ImageData), - (MediaType.Text, TextData), - (MediaType.Video, VideoData), - (MediaType.Conversational, ConversationData), - (MediaType.Document, DocumentData), - ], -) -def test_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = lb_types.Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py index f8f0c449a..18385c9d9 100644 --- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py +++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py @@ -28,76 +28,6 @@ def validate_iso_format(date_string: str): assert parsed_t.second is not None -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - (MediaType.LLMPromptResponseCreation, GenericDataRowData), - (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_data_row_id( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_datarow_id, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(uid=hardcoded_datarow_id()), - annotations=label.annotations, - ) - - assert data_label.data.uid == label.data.uid - assert label.annotations == data_label.annotations - - -@pytest.mark.parametrize( - "media_type, data_type_class", - [ - (MediaType.Audio, GenericDataRowData), - (MediaType.Html, GenericDataRowData), - (MediaType.Image, GenericDataRowData), - (MediaType.Text, GenericDataRowData), - (MediaType.Video, GenericDataRowData), - (MediaType.Conversational, GenericDataRowData), - (MediaType.Document, GenericDataRowData), - # (MediaType.LLMPromptResponseCreation, GenericDataRowData), - # (MediaType.LLMPromptCreation, GenericDataRowData), - (OntologyKind.ResponseCreation, GenericDataRowData), - ], -) -def test_generic_data_row_type_by_global_key( - media_type, - data_type_class, - annotations_by_media_type, - hardcoded_global_key, -): - annotations_ndjson = annotations_by_media_type[media_type] - annotations_ndjson = [annotation[0] for annotation in annotations_ndjson] - - label = list(NDJsonConverter.deserialize(annotations_ndjson))[0] - - data_label = Label( - data=data_type_class(global_key=hardcoded_global_key()), - annotations=label.annotations, - ) - - assert data_label.data.global_key == label.data.global_key - assert label.annotations == data_label.annotations - - @pytest.mark.parametrize( "configured_project, media_type", [ diff --git a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py index fccca2a3f..5f47975ad 100644 --- a/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py +++ b/libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py @@ -1,5 +1,19 @@ import uuid from labelbox import parser +from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import ( + ClassificationAnnotation, + ClassificationAnswer, + Radio, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.geometry.line import Line +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.polygon import Polygon +from labelbox.data.annotation_types.geometry.rectangle import Rectangle +from labelbox.data.annotation_types.label import Label import pytest from labelbox import ModelRun @@ -193,14 +207,60 @@ def test_create_from_label_objects( annotation_import_test_helpers, ): name = str(uuid.uuid4()) - use_data_row_ids = [ + use_data_row_id = [ p["dataRow"]["id"] for p in object_predictions_for_annotation_import ] - model_run_with_data_rows.upsert_data_rows(use_data_row_ids) - predictions = list( - NDJsonConverter.deserialize(object_predictions_for_annotation_import) - ) + model_run_with_data_rows.upsert_data_rows(use_data_row_id) + + predictions = [] + for data_row_id in use_data_row_id: + predictions.append( + Label( + data=GenericDataRowData( + uid=data_row_id, + ), + annotations=[ + ObjectAnnotation( + name="polygon", + extra={ + "uuid": "6d10fa30-3ea0-4e6c-bbb1-63f5c29fe3e4", + }, + value=Polygon( + points=[ + Point(x=147.692, y=118.154), + Point(x=142.769, y=104.923), + Point(x=57.846, y=118.769), + Point(x=28.308, y=169.846), + Point(x=147.692, y=118.154), + ], + ), + ), + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "15b7138f-4bbc-42c5-ae79-45d87b0a3b2a", + }, + value=Rectangle( + start=Point(x=58.0, y=48.0), + end=Point(x=70.0, y=113.0), + ), + ), + ObjectAnnotation( + name="polyline", + extra={ + "uuid": "cf4c6df9-c39c-4fbc-9541-470f6622978a", + }, + value=Line( + points=[ + Point(x=147.692, y=118.154), + Point(x=150.692, y=160.154), + ], + ), + ), + ], + ), + ) annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=predictions diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py new file mode 100644 index 000000000..0dc4c21c0 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_generic_data_row_data.py @@ -0,0 +1,79 @@ +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ClassificationAnnotation, Text + + +def test_generic_data_row_global_key(): + label_1 = Label( + data=GenericDataRowData(global_key="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"global_key": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"globalKey": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) + + +def test_generic_data_row_id(): + label_1 = Label( + data=GenericDataRowData(uid="test"), + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + label_2 = Label( + data={"uid": "test"}, + annotations=[ + ClassificationAnnotation( + name="free_text", + value=Text(answer="sample text"), + extra={"uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0"}, + ) + ], + ) + + expected_result = [ + { + "answer": "sample text", + "dataRow": {"id": "test"}, + "name": "free_text", + "uuid": "141c3592-e5f0-4866-9943-d4a21fd47eb0", + } + ] + assert ( + list(NDJsonConverter.serialize([label_1])) + == list(NDJsonConverter.serialize([label_2])) + == expected_result + ) From ed9ba18689cd6f224305da17f68af1ca5d40e8fd Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Sat, 14 Sep 2024 20:48:07 -0500 Subject: [PATCH 02/13] Removed deserialize and subregistory logic --- .../data/serialization/ndjson/base.py | 12 ---- .../serialization/ndjson/classification.py | 13 ++-- .../data/serialization/ndjson/converter.py | 14 ---- .../data/serialization/ndjson/label.py | 64 +------------------ .../data/serialization/ndjson/metric.py | 5 +- .../labelbox/data/serialization/ndjson/mmc.py | 4 +- .../data/serialization/ndjson/objects.py | 49 +++++--------- .../data/serialization/ndjson/relationship.py | 4 +- 8 files changed, 28 insertions(+), 137 deletions(-) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py index 75ebdc100..d8d8cd36f 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/base.py @@ -8,18 +8,6 @@ from ....annotated_types import Cuid -subclass_registry = {} - - -class _SubclassRegistryBase(BaseModel): - model_config = ConfigDict(extra="allow") - - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - if cls.__name__ != "NDAnnotation": - with threading.Lock(): - subclass_registry[cls.__name__] = cls - class DataRow(_CamelCaseMixin): id: Optional[str] = None diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index b127c4a90..2c3215265 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -30,7 +30,6 @@ model_serializer, ) from pydantic.alias_generators import to_camel -from .base import _SubclassRegistryBase class NDAnswer(ConfidenceMixin, CustomMetricsMixin): @@ -224,7 +223,7 @@ def from_common( # ====== End of subclasses -class NDText(NDAnnotation, NDTextSubclass, _SubclassRegistryBase): +class NDText(NDAnnotation, NDTextSubclass): @classmethod def from_common( cls, @@ -249,9 +248,7 @@ def from_common( ) -class NDChecklist( - NDAnnotation, NDChecklistSubclass, VideoSupported, _SubclassRegistryBase -): +class NDChecklist(NDAnnotation, NDChecklistSubclass, VideoSupported): @model_serializer(mode="wrap") def serialize_model(self, handler): res = handler(self) @@ -298,9 +295,7 @@ def from_common( ) -class NDRadio( - NDAnnotation, NDRadioSubclass, VideoSupported, _SubclassRegistryBase -): +class NDRadio(NDAnnotation, NDRadioSubclass, VideoSupported): @classmethod def from_common( cls, @@ -343,7 +338,7 @@ def serialize_model(self, handler): return res -class NDPromptText(NDAnnotation, NDPromptTextSubclass, _SubclassRegistryBase): +class NDPromptText(NDAnnotation, NDPromptTextSubclass): @classmethod def from_common( cls, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py index 01ab8454a..8176d7862 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py @@ -26,20 +26,6 @@ class NDJsonConverter: - @staticmethod - def deserialize(json_data: Iterable[Dict[str, Any]]) -> LabelGenerator: - """ - Converts ndjson data (prediction import format) into the common labelbox format. - - Args: - json_data: An iterable representing the ndjson data - Returns: - LabelGenerator containing the ndjson data. - """ - data = NDLabel(**{"annotations": copy.copy(json_data)}) - res = data.to_common() - return res - @staticmethod def serialize( labels: LabelCollection, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 18134a228..7039ae834 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -46,7 +46,6 @@ from .relationship import NDRelationship from .base import DataRow from pydantic import BaseModel, ValidationError -from .base import subclass_registry, _SubclassRegistryBase from pydantic_core import PydanticUndefined from contextlib import suppress @@ -67,68 +66,7 @@ class NDLabel(BaseModel): - annotations: List[_SubclassRegistryBase] - - def __init__(self, **kwargs): - # NOTE: Deserialization of subclasses in pydantic is difficult, see here https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 - # Below implements the subclass registry as mentioned in the article. The python dicts we pass in can be missing certain fields - # we essentially have to infer the type against all sub classes that have the _SubclasssRegistryBase inheritance. - # It works by checking if the keys of our annotations we are missing in matches any required subclass. - # More keys are prioritized over less keys (closer match). This is used when importing json to our base models not a lot of customer workflows - # depend on this method but this works for all our existing tests with the bonus of added validation. (no subclass found it throws an error) - - for index, annotation in enumerate(kwargs["annotations"]): - if isinstance(annotation, dict): - item_annotation_keys = annotation.keys() - key_subclass_combos = defaultdict(list) - for subclass in subclass_registry.values(): - # Get all required keys from subclass - annotation_keys = [] - for k, field in subclass.model_fields.items(): - if field.default == PydanticUndefined and k != "uuid": - if ( - hasattr(field, "alias") - and field.alias in item_annotation_keys - ): - annotation_keys.append(field.alias) - elif ( - hasattr(field, "validation_alias") - and field.validation_alias - in item_annotation_keys - ): - annotation_keys.append(field.validation_alias) - else: - annotation_keys.append(k) - - key_subclass_combos[subclass].extend(annotation_keys) - - # Sort by subclass that has the most keys i.e. the one with the most keys that matches is most likely our subclass - key_subclass_combos = dict( - sorted( - key_subclass_combos.items(), - key=lambda x: len(x[1]), - reverse=True, - ) - ) - - for subclass, key_subclass_combo in key_subclass_combos.items(): - # Choose the keys from our dict we supplied that matches the required keys of a subclass - check_required_keys = all( - key in list(item_annotation_keys) - for key in key_subclass_combo - ) - if check_required_keys: - # Keep trying subclasses until we find one that has valid values (does not throw an validation error) - with suppress(ValidationError): - annotation = subclass(**annotation) - break - if isinstance(annotation, dict): - raise ValueError( - f"Could not find subclass for fields: {item_annotation_keys}" - ) - - kwargs["annotations"][index] = annotation - super().__init__(**kwargs) + annotations: AnnotationType class _Relationship(BaseModel): """This object holds information about the relationship""" diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index 60d538b19..b28e575cf 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -15,7 +15,6 @@ ConfusionMatrixMetricConfidenceValue, ) from pydantic import ConfigDict, model_serializer -from .base import _SubclassRegistryBase class BaseNDMetric(NDJsonBase): @@ -33,7 +32,7 @@ def serialize_model(self, handler): return res -class NDConfusionMatrixMetric(BaseNDMetric, _SubclassRegistryBase): +class NDConfusionMatrixMetric(BaseNDMetric): metric_value: Union[ ConfusionMatrixMetricValue, ConfusionMatrixMetricConfidenceValue ] @@ -65,7 +64,7 @@ def from_common( ) -class NDScalarMetric(BaseNDMetric, _SubclassRegistryBase): +class NDScalarMetric(BaseNDMetric): metric_value: Union[ScalarMetricValue, ScalarMetricConfidenceValue] metric_name: Optional[str] = None aggregation: Optional[ScalarMetricAggregation] = ( diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 4be24f683..74d185f45 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -2,7 +2,7 @@ from labelbox.utils import _CamelCaseMixin -from .base import _SubclassRegistryBase, DataRow, NDAnnotation +from .base import DataRow, NDAnnotation from ...annotation_types.mmc import ( MessageSingleSelectionTask, MessageMultiSelectionTask, @@ -20,7 +20,7 @@ class MessageTaskData(_CamelCaseMixin): ] -class NDMessageTask(NDAnnotation, _SubclassRegistryBase): +class NDMessageTask(NDAnnotation): message_evaluation_task: MessageTaskData def to_common(self) -> MessageEvaluationTaskAnnotation: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index a1465fa06..91abface6 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -52,7 +52,7 @@ NDSubclassification, NDSubclassificationType, ) -from .base import DataRow, NDAnnotation, NDJsonBase, _SubclassRegistryBase +from .base import DataRow, NDAnnotation, NDJsonBase from pydantic import BaseModel @@ -81,9 +81,7 @@ class Bbox(BaseModel): width: float -class NDPoint( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPoint(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): point: _Point def to_common(self) -> Point: @@ -114,7 +112,7 @@ def from_common( ) -class NDFramePoint(VideoSupported, _SubclassRegistryBase): +class NDFramePoint(VideoSupported): point: _Point classifications: List[NDSubclassificationType] = [] @@ -148,9 +146,7 @@ def from_common( ) -class NDLine( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDLine(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): line: List[_Point] def to_common(self) -> Line: @@ -181,7 +177,7 @@ def from_common( ) -class NDFrameLine(VideoSupported, _SubclassRegistryBase): +class NDFrameLine(VideoSupported): line: List[_Point] classifications: List[NDSubclassificationType] = [] @@ -215,7 +211,7 @@ def from_common( ) -class NDDicomLine(NDFrameLine, _SubclassRegistryBase): +class NDDicomLine(NDFrameLine): def to_common( self, name: str, @@ -234,9 +230,7 @@ def to_common( ) -class NDPolygon( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDPolygon(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): polygon: List[_Point] def to_common(self) -> Polygon: @@ -267,9 +261,7 @@ def from_common( ) -class NDRectangle( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDRectangle(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): bbox: Bbox def to_common(self) -> Rectangle: @@ -313,7 +305,7 @@ def from_common( ) -class NDDocumentRectangle(NDRectangle, _SubclassRegistryBase): +class NDDocumentRectangle(NDRectangle): page: int unit: str @@ -360,7 +352,7 @@ def from_common( ) -class NDFrameRectangle(VideoSupported, _SubclassRegistryBase): +class NDFrameRectangle(VideoSupported): bbox: Bbox classifications: List[NDSubclassificationType] = [] @@ -496,7 +488,7 @@ def to_common( ] -class NDSegments(NDBaseObject, _SubclassRegistryBase): +class NDSegments(NDBaseObject): segments: List[NDSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -532,7 +524,7 @@ def from_common( ) -class NDDicomSegments(NDBaseObject, DicomSupported, _SubclassRegistryBase): +class NDDicomSegments(NDBaseObject, DicomSupported): segments: List[NDDicomSegment] def to_common(self, name: str, feature_schema_id: Cuid): @@ -580,9 +572,7 @@ class _PNGMask(BaseModel): png: str -class NDMask( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDMask(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): mask: Union[_URIMask, _PNGMask] def to_common(self) -> Mask: @@ -646,7 +636,6 @@ class NDVideoMasks( NDJsonBase, ConfidenceMixin, CustomMetricsNotSupportedMixin, - _SubclassRegistryBase, ): masks: NDVideoMasksFramesInstances @@ -678,7 +667,7 @@ def from_common(cls, annotation, data): ) -class NDDicomMasks(NDVideoMasks, DicomSupported, _SubclassRegistryBase): +class NDDicomMasks(NDVideoMasks, DicomSupported): def to_common(self) -> DICOMMaskAnnotation: return DICOMMaskAnnotation( frames=self.masks.frames, @@ -702,9 +691,7 @@ class Location(BaseModel): end: int -class NDTextEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDTextEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): location: Location def to_common(self) -> TextEntity: @@ -738,9 +725,7 @@ def from_common( ) -class NDDocumentEntity( - NDBaseObject, ConfidenceMixin, CustomMetricsMixin, _SubclassRegistryBase -): +class NDDocumentEntity(NDBaseObject, ConfidenceMixin, CustomMetricsMixin): name: str text_selections: List[DocumentTextSelection] @@ -774,7 +759,7 @@ def from_common( ) -class NDConversationEntity(NDTextEntity, _SubclassRegistryBase): +class NDConversationEntity(NDTextEntity): message_id: str def to_common(self) -> ConversationEntity: diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index fbea7e477..94c8e9879 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -5,7 +5,7 @@ from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType -from .base import DataRow, _SubclassRegistryBase +from .base import DataRow SUPPORTED_ANNOTATIONS = NDObjectType @@ -16,7 +16,7 @@ class _Relationship(BaseModel): type: str -class NDRelationship(NDAnnotation, _SubclassRegistryBase): +class NDRelationship(NDAnnotation): relationship: _Relationship @staticmethod From fd5c3917d179696aafba7c296a99508de24163bc Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:15:30 -0500 Subject: [PATCH 03/13] temp added v6 to workflows so test kick off --- .github/workflows/lbox-develop.yml | 32 ++++++++--------- .github/workflows/python-package-develop.yml | 38 ++++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/lbox-develop.yml b/.github/workflows/lbox-develop.yml index ba1e4f34e..497ef4afb 100644 --- a/.github/workflows/lbox-develop.yml +++ b/.github/workflows/lbox-develop.yml @@ -2,9 +2,9 @@ name: LBox Develop on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -27,7 +27,7 @@ jobs: - uses: dorny/paths-filter@v3 id: filter with: - list-files: 'json' + list-files: "json" filters: | lbox: - 'libs/lbox*/**' @@ -36,16 +36,16 @@ jobs: with: files-changed: ${{ steps.filter.outputs.lbox_files }} build: - needs: ['path-filter'] + needs: ["path-filter"] if: ${{ needs.path-filter.outputs.lbox == 'true' }} runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} + matrix: + include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} concurrency: group: lbox-staging-${{ matrix.python-version }}-${{ matrix.package }} - cancel-in-progress: false + cancel-in-progress: false steps: - uses: actions/checkout@v4 with: @@ -67,19 +67,19 @@ jobs: env: LABELBOX_TEST_API_KEY: ${{ secrets[matrix.api-key] }} DA_GCP_LABELBOX_API_KEY: ${{ secrets[matrix.da-test-key] }} - LABELBOX_TEST_ENVIRON: 'staging' + LABELBOX_TEST_ENVIRON: "staging" run: rye run integration test-pypi: runs-on: ubuntu-latest - needs: ['build', 'path-filter'] + needs: ["build", "path-filter"] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} - environment: + environment: name: Test-PyPI-${{ matrix.package }} - url: 'https://test.pypi.org/p/${{ matrix.package }}' + url: "https://test.pypi.org/p/${{ matrix.package }}" permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -90,7 +90,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: '3.8' + python-version: "3.8" - name: Create build id: create-build working-directory: libs/${{ matrix.package }} @@ -107,11 +107,11 @@ jobs: repository-url: https://test.pypi.org/legacy/ test-container: runs-on: ubuntu-latest - needs: ['build', 'path-filter'] + needs: ["build", "path-filter"] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -163,4 +163,4 @@ jobs: - name: Build and push (Pull Request) Output if: github.event_name == 'pull_request' run: | - echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" \ No newline at end of file + echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/python-package-develop.yml b/.github/workflows/python-package-develop.yml index 05eff5dc4..dcec45103 100644 --- a/.github/workflows/python-package-develop.yml +++ b/.github/workflows/python-package-develop.yml @@ -2,9 +2,9 @@ name: Labelbox Python SDK Staging (Develop) on: push: - branches: [develop] + branches: [develop, v6] pull_request: - branches: [develop] + branches: [develop, v6] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -37,10 +37,10 @@ jobs: uses: actions/checkout@v2 with: ref: ${{ github.event.repository.default_branch }} - + - name: Fetch tags run: git fetch --tags - + - name: Get Latest SDK versions id: get_sdk_versions run: | @@ -50,9 +50,9 @@ jobs: exit 1 fi echo "sdk_versions=$sdk_versions" - echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT + echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT build: - needs: ['path-filter', 'get_sdk_versions'] + needs: ["path-filter", "get_sdk_versions"] if: ${{ needs.path-filter.outputs.labelbox == 'true' }} strategy: fail-fast: false @@ -84,15 +84,15 @@ jobs: da-test-key: ${{ matrix.da-test-key }} sdk-version: ${{ matrix.sdk-version }} fixture-profile: true - test-env: 'staging' + test-env: "staging" secrets: inherit test-pypi: runs-on: ubuntu-latest - needs: ['path-filter'] + needs: ["path-filter"] continue-on-error: true - environment: + environment: name: Test-PyPI - url: 'https://test.pypi.org/p/labelbox-test' + url: "https://test.pypi.org/p/labelbox-test" permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -103,7 +103,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: '3.8' + python-version: "3.8" - name: Create build id: create-build working-directory: libs/labelbox @@ -119,8 +119,8 @@ jobs: packages-dir: dist/ repository-url: https://test.pypi.org/legacy/ test-container: - runs-on: ubuntu-latest - needs: ['path-filter'] + runs-on: ubuntu-latest + needs: ["path-filter"] continue-on-error: true permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -138,14 +138,14 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - + - name: Log in to the Container registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Build and push (Develop) if: github.event_name == 'push' uses: docker/build-push-action@v5 @@ -154,7 +154,7 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 @@ -162,7 +162,7 @@ jobs: tags: | ${{ env.CONTAINER_IMAGE }}:develop ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} - + - name: Build and push (Pull Request) if: github.event_name == 'pull_request' uses: docker/build-push-action@v5 @@ -171,10 +171,10 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 tags: | - ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} \ No newline at end of file + ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} From 5f4f73356a991da613be6f4e98ef321f275199b8 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:19:09 -0500 Subject: [PATCH 04/13] fixed linting tests --- .../schema/labeling_service_dashboard.py | 38 ++++--- .../unit/test_labeling_service_dashboard.py | 102 +++++++++--------- 2 files changed, 76 insertions(+), 64 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py index 2052897f6..c5e1fa11e 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service_dashboard.py @@ -84,7 +84,8 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if not self.client.enable_experimental: raise RuntimeError( - "Please enable experimental in client to use LabelingService") + "Please enable experimental in client to use LabelingService" + ) @property def service_type(self): @@ -97,20 +98,28 @@ def service_type(self): if self.editor_task_type is None: return sentence_case(self.media_type.value) - if (self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation - and self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.OfflineModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Offline chat evaluation" - if (self.editor_task_type == EditorTaskType.ModelChatEvaluation and - self.media_type == MediaType.Conversational): + if ( + self.editor_task_type == EditorTaskType.ModelChatEvaluation + and self.media_type == MediaType.Conversational + ): return "Live chat evaluation" - if (self.editor_task_type == EditorTaskType.ResponseCreation and - self.media_type == MediaType.Text): + if ( + self.editor_task_type == EditorTaskType.ResponseCreation + and self.media_type == MediaType.Text + ): return "Response creation" - if (self.media_type == MediaType.LLMPromptCreation or - self.media_type == MediaType.LLMPromptResponseCreation): + if ( + self.media_type == MediaType.LLMPromptCreation + or self.media_type == MediaType.LLMPromptResponseCreation + ): return "Prompt response creation" return sentence_case(self.media_type.value) @@ -154,7 +163,8 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) else: template = Template( """query SearchProjectsPyApi($$first: Int, $$from: String) { @@ -164,11 +174,13 @@ def get_all( pageInfo { endCursor } } } - """) + """ + ) query_str = template.substitute( labeling_dashboard_selections=GRAPHQL_QUERY_SELECTIONS, search_query=build_search_filter(search_query) - if search_query else None, + if search_query + else None, ) params: Dict[str, Union[str, int]] = {} @@ -186,7 +198,7 @@ def convert_to_labeling_service_dashboard(client, data): experimental=True, ) - @model_validator(mode='before') + @model_validator(mode="before") def convert_boost_data(cls, data): if "boostStatus" in data: data["status"] = LabelingServiceStatus(data.pop("boostStatus")) diff --git a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py index 8ecdef2f1..061efbadf 100644 --- a/libs/labelbox/tests/unit/test_labeling_service_dashboard.py +++ b/libs/labelbox/tests/unit/test_labeling_service_dashboard.py @@ -5,23 +5,23 @@ def test_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count is None @@ -29,23 +29,23 @@ def test_no_tasks_remaining_count(): def test_tasks_remaining_count_exists(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 0, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 0, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 1, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 0, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 0, + "tasksCompletedCount": 0, + "tasksRemainingCount": 1, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 1 @@ -53,23 +53,23 @@ def test_tasks_remaining_count_exists(): def test_tasks_total_no_tasks_remaining_count(): labeling_service_dashboard_data = { - 'id': 'cm0eeo4c301lg07061phfhva0', - 'name': 'TestStatus', - 'boostRequestedAt': '2024-08-28T22:08:07.446Z', - 'boostUpdatedAt': '2024-08-28T22:08:07.446Z', - 'boostRequestedBy': None, - 'boostStatus': 'SET_UP', - 'dataRowsCount': 0, - 'dataRowsDoneCount': 0, - 'dataRowsInReviewCount': 1, - 'dataRowsInReworkCount': 0, - 'tasksTotalCount': 1, - 'tasksCompletedCount': 0, - 'tasksRemainingCount': 0, - 'mediaType': 'image', - 'editorTaskType': None, - 'tags': [], - 'client': MagicMock() + "id": "cm0eeo4c301lg07061phfhva0", + "name": "TestStatus", + "boostRequestedAt": "2024-08-28T22:08:07.446Z", + "boostUpdatedAt": "2024-08-28T22:08:07.446Z", + "boostRequestedBy": None, + "boostStatus": "SET_UP", + "dataRowsCount": 0, + "dataRowsDoneCount": 0, + "dataRowsInReviewCount": 1, + "dataRowsInReworkCount": 0, + "tasksTotalCount": 1, + "tasksCompletedCount": 0, + "tasksRemainingCount": 0, + "mediaType": "image", + "editorTaskType": None, + "tags": [], + "client": MagicMock(), } lsd = LabelingServiceDashboard(**labeling_service_dashboard_data) assert lsd.tasks_remaining_count == 0 From 54909b4184778d323d348530ce3cbe41418ac4a0 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:31:28 -0500 Subject: [PATCH 05/13] removed formatting --- .github/workflows/lbox-develop.yml | 28 ++++++++-------- .github/workflows/python-package-develop.yml | 34 ++++++++++---------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/lbox-develop.yml b/.github/workflows/lbox-develop.yml index 497ef4afb..efb642f66 100644 --- a/.github/workflows/lbox-develop.yml +++ b/.github/workflows/lbox-develop.yml @@ -27,7 +27,7 @@ jobs: - uses: dorny/paths-filter@v3 id: filter with: - list-files: "json" + list-files: 'json' filters: | lbox: - 'libs/lbox*/**' @@ -36,16 +36,16 @@ jobs: with: files-changed: ${{ steps.filter.outputs.lbox_files }} build: - needs: ["path-filter"] + needs: ['path-filter'] if: ${{ needs.path-filter.outputs.lbox == 'true' }} runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} + matrix: + include: ${{ fromJSON(needs.path-filter.outputs.test-matrix) }} concurrency: group: lbox-staging-${{ matrix.python-version }}-${{ matrix.package }} - cancel-in-progress: false + cancel-in-progress: false steps: - uses: actions/checkout@v4 with: @@ -67,19 +67,19 @@ jobs: env: LABELBOX_TEST_API_KEY: ${{ secrets[matrix.api-key] }} DA_GCP_LABELBOX_API_KEY: ${{ secrets[matrix.da-test-key] }} - LABELBOX_TEST_ENVIRON: "staging" + LABELBOX_TEST_ENVIRON: 'staging' run: rye run integration test-pypi: runs-on: ubuntu-latest - needs: ["build", "path-filter"] + needs: ['build', 'path-filter'] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} - environment: + environment: name: Test-PyPI-${{ matrix.package }} - url: "https://test.pypi.org/p/${{ matrix.package }}" + url: 'https://test.pypi.org/p/${{ matrix.package }}' permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -90,7 +90,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: "3.8" + python-version: '3.8' - name: Create build id: create-build working-directory: libs/${{ matrix.package }} @@ -107,11 +107,11 @@ jobs: repository-url: https://test.pypi.org/legacy/ test-container: runs-on: ubuntu-latest - needs: ["build", "path-filter"] + needs: ['build', 'path-filter'] if: ${{ needs.path-filter.outputs.lbox == 'true' }} strategy: fail-fast: false - matrix: + matrix: include: ${{ fromJSON(needs.path-filter.outputs.package-matrix) }} permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -163,4 +163,4 @@ jobs: - name: Build and push (Pull Request) Output if: github.event_name == 'pull_request' run: | - echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" + echo "ghcr.io/labelbox/${{ matrix.package }}:${{ github.sha }}" >> "$GITHUB_STEP_SUMMARY" \ No newline at end of file diff --git a/.github/workflows/python-package-develop.yml b/.github/workflows/python-package-develop.yml index dcec45103..769d04c74 100644 --- a/.github/workflows/python-package-develop.yml +++ b/.github/workflows/python-package-develop.yml @@ -37,10 +37,10 @@ jobs: uses: actions/checkout@v2 with: ref: ${{ github.event.repository.default_branch }} - + - name: Fetch tags run: git fetch --tags - + - name: Get Latest SDK versions id: get_sdk_versions run: | @@ -50,9 +50,9 @@ jobs: exit 1 fi echo "sdk_versions=$sdk_versions" - echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT + echo "sdk_versions=$sdk_versions" >> $GITHUB_OUTPUT build: - needs: ["path-filter", "get_sdk_versions"] + needs: ['path-filter', 'get_sdk_versions'] if: ${{ needs.path-filter.outputs.labelbox == 'true' }} strategy: fail-fast: false @@ -84,15 +84,15 @@ jobs: da-test-key: ${{ matrix.da-test-key }} sdk-version: ${{ matrix.sdk-version }} fixture-profile: true - test-env: "staging" + test-env: 'staging' secrets: inherit test-pypi: runs-on: ubuntu-latest - needs: ["path-filter"] + needs: ['path-filter'] continue-on-error: true - environment: + environment: name: Test-PyPI - url: "https://test.pypi.org/p/labelbox-test" + url: 'https://test.pypi.org/p/labelbox-test' permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write @@ -103,7 +103,7 @@ jobs: - uses: ./.github/actions/python-package-shared-setup with: rye-version: ${{ vars.RYE_VERSION }} - python-version: "3.8" + python-version: '3.8' - name: Create build id: create-build working-directory: libs/labelbox @@ -119,8 +119,8 @@ jobs: packages-dir: dist/ repository-url: https://test.pypi.org/legacy/ test-container: - runs-on: ubuntu-latest - needs: ["path-filter"] + runs-on: ubuntu-latest + needs: ['path-filter'] continue-on-error: true permissions: # IMPORTANT: this permission is mandatory for trusted publishing @@ -138,14 +138,14 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - + - name: Log in to the Container registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Build and push (Develop) if: github.event_name == 'push' uses: docker/build-push-action@v5 @@ -154,7 +154,7 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 @@ -162,7 +162,7 @@ jobs: tags: | ${{ env.CONTAINER_IMAGE }}:develop ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} - + - name: Build and push (Pull Request) if: github.event_name == 'pull_request' uses: docker/build-push-action@v5 @@ -171,10 +171,10 @@ jobs: file: ./libs/labelbox/Dockerfile github-token: ${{ secrets.GITHUB_TOKEN }} push: true - + platforms: | linux/amd64 linux/arm64 tags: | - ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} + ${{ env.CONTAINER_IMAGE }}:${{ github.sha }} \ No newline at end of file From da7aeaa8cf27120e0e15335b3e9b786b894f9bc7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 14:36:23 -0500 Subject: [PATCH 06/13] Removed Coco --- .../labelbox/data/serialization/__init__.py | 1 - .../data/serialization/coco/__init__.py | 1 - .../data/serialization/coco/annotation.py | 78 ----- .../data/serialization/coco/categories.py | 17 -- .../data/serialization/coco/converter.py | 170 ----------- .../labelbox/data/serialization/coco/image.py | 52 ---- .../serialization/coco/instance_dataset.py | 266 ------------------ .../serialization/coco/panoptic_dataset.py | 242 ---------------- .../labelbox/data/serialization/coco/path.py | 9 - .../data/serialization/coco/test_coco.py | 38 --- 10 files changed, 874 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/__init__.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/annotation.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/categories.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/converter.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/image.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py delete mode 100644 libs/labelbox/src/labelbox/data/serialization/coco/path.py delete mode 100644 libs/labelbox/tests/data/serialization/coco/test_coco.py diff --git a/libs/labelbox/src/labelbox/data/serialization/__init__.py b/libs/labelbox/src/labelbox/data/serialization/__init__.py index 71a9b3443..38cb5edff 100644 --- a/libs/labelbox/src/labelbox/data/serialization/__init__.py +++ b/libs/labelbox/src/labelbox/data/serialization/__init__.py @@ -1,2 +1 @@ from .ndjson import NDJsonConverter -from .coco import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py b/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py deleted file mode 100644 index 4511e89ee..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .converter import COCOConverter diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py b/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py deleted file mode 100644 index e387cb7d9..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/annotation.py +++ /dev/null @@ -1,78 +0,0 @@ -from typing import Any, Tuple, List, Union -from pathlib import Path -from collections import defaultdict -import warnings - -from ...annotation_types.relationship import RelationshipAnnotation -from ...annotation_types.metrics.confusion_matrix import ConfusionMatrixMetric -from ...annotation_types.metrics.scalar import ScalarMetric -from ...annotation_types.video import VideoMaskAnnotation -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.classification.classification import ( - ClassificationAnnotation, -) - -import numpy as np - -from .path import PathSerializerMixin -from pydantic import BaseModel - - -def rle_decoding(rle_arr: List[int], w: int, h: int) -> np.ndarray: - indices = [] - for idx, cnt in zip(rle_arr[0::2], rle_arr[1::2]): - indices.extend( - list(range(idx - 1, idx + cnt - 1)) - ) # RLE is 1-based index - mask = np.zeros(h * w, dtype=np.uint8) - mask[indices] = 1 - return mask.reshape((w, h)).T - - -def get_annotation_lookup(annotations): - """Get annotations from Label.annotations objects - - Args: - annotations (Label.annotations): Annotations attached to labelbox Label object used as private method - """ - annotation_lookup = defaultdict(list) - for annotation in annotations: - # Provide a default value of None if the attribute doesn't exist - attribute_value = getattr(annotation, "image_id", None) or getattr( - annotation, "name", None - ) - annotation_lookup[attribute_value].append(annotation) - return annotation_lookup - - -class SegmentInfo(BaseModel): - id: int - category_id: int - area: Union[float, int] - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class RLE(BaseModel): - counts: List[int] - size: Tuple[int, int] # h,w or w,h? - - -class COCOObjectAnnotation(BaseModel): - # All segmentations for a particular class in an image... - # So each image will have one of these for each class present in the image.. - # Annotations only exist if there is data.. - id: int - image_id: int - category_id: int - segmentation: Union[RLE, List[List[float]]] # [[x1,y1,x2,y2,x3,y3...]] - area: float - bbox: Tuple[float, float, float, float] # [x,y,w,h], - iscrowd: int = 0 - - -class PanopticAnnotation(PathSerializerMixin): - # One to one relationship between image and panoptic annotation - image_id: int - file_name: Path - segments_info: List[SegmentInfo] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py b/libs/labelbox/src/labelbox/data/serialization/coco/categories.py deleted file mode 100644 index 60ba30fce..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/categories.py +++ /dev/null @@ -1,17 +0,0 @@ -import sys -from hashlib import md5 - -from pydantic import BaseModel - - -class Categories(BaseModel): - id: int - name: str - supercategory: str - isthing: int = 1 - - -def hash_category_name(name: str) -> int: - return int.from_bytes( - md5(name.encode("utf-8")).hexdigest().encode("utf-8"), "little" - ) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py b/libs/labelbox/src/labelbox/data/serialization/coco/converter.py deleted file mode 100644 index e270b7573..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/converter.py +++ /dev/null @@ -1,170 +0,0 @@ -from typing import Dict, Any, Union -from pathlib import Path -import os -import warnings - -from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...serialization.coco.instance_dataset import CocoInstanceDataset -from ...serialization.coco.panoptic_dataset import CocoPanopticDataset - - -def create_path_if_not_exists( - path: Union[Path, str], ignore_existing_data=False -): - path = Path(path) - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - elif not ignore_existing_data and os.listdir(path): - raise ValueError( - f"Directory `{path}`` must be empty. Or set `ignore_existing_data=True`" - ) - return path - - -def validate_path(path: Union[Path, str], name: str): - path = Path(path) - if not path.exists(): - raise ValueError(f"{name} `{path}` must exist") - return path - - -class COCOConverter: - """ - Class for converting between coco and labelbox formats - Note that this class is only compatible with image data. - - Subclasses are currently ignored. - To use subclasses, manually flatten them before using the converter. - """ - - @staticmethod - def serialize_instances( - labels: LabelCollection, - image_root: Union[Path, str], - ignore_existing_data=False, - max_workers=8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process - Returns: - A dictionary containing labels in the coco object format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - return CocoInstanceDataset.from_common( - labels=labels, image_root=image_root, max_workers=max_workers - ).model_dump() - - @staticmethod - def serialize_panoptic( - labels: LabelCollection, - image_root: Union[Path, str], - mask_root: Union[Path, str], - all_stuff: bool = False, - ignore_existing_data=False, - max_workers: int = 8, - ) -> Dict[str, Any]: - """ - Convert a Labelbox LabelCollection into an mscoco dataset. - This function will only convert masks, polygons, and rectangles. - Masks will be converted into individual instances. - Use deserialize_panoptic to prevent masks from being split apart. - - Args: - labels: A collection of labels to convert - image_root: Where to save images to - mask_root: Where to save segmentation masks to - all_stuff: If rectangle or polygon annotations are encountered, they will be treated as instances. - To convert them to stuff class set `all_stuff=True`. - ignore_existing_data: Whether or not to raise an exception if images already exist. - This exists only to support detectons panoptic fpn model which requires two mscoco payloads for the same images. - max_workers : Number of workers to process dataset with. A value of 0 will process all data in the main process. - Returns: - A dictionary containing labels in the coco panoptic format. - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = create_path_if_not_exists(image_root, ignore_existing_data) - mask_root = create_path_if_not_exists(mask_root, ignore_existing_data) - return CocoPanopticDataset.from_common( - labels=labels, - image_root=image_root, - mask_root=mask_root, - all_stuff=all_stuff, - max_workers=max_workers, - ).model_dump() - - @staticmethod - def deserialize_panoptic( - json_data: Dict[str, Any], - image_root: Union[Path, str], - mask_root: Union[Path, str], - ) -> LabelGenerator: - """ - Convert coco panoptic data into the labelbox format (as a LabelGenerator). - - Args: - json_data: panoptic data as a dict - image_root: Path to local images that are referenced by the panoptic json - mask_root: Path to local segmentation masks that are referenced by the panoptic json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - mask_root = validate_path(mask_root, "mask_root") - objs = CocoPanopticDataset(**json_data) - gen = objs.to_common(image_root, mask_root) - return LabelGenerator(data=gen) - - @staticmethod - def deserialize_instances( - json_data: Dict[str, Any], image_root: Path - ) -> LabelGenerator: - """ - Convert coco object data into the labelbox format (as a LabelGenerator). - - Args: - json_data: coco object data as a dict - image_root: Path to local images that are referenced by the coco object json - Returns: - LabelGenerator - """ - - warnings.warn( - "You are currently utilizing COCOconverter for this action, which will be deprecated in a later release.", - DeprecationWarning, - stacklevel=2, - ) - - image_root = validate_path(image_root, "image_root") - objs = CocoInstanceDataset(**json_data) - gen = objs.to_common(image_root) - return LabelGenerator(data=gen) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/image.py b/libs/labelbox/src/labelbox/data/serialization/coco/image.py deleted file mode 100644 index cef173377..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/image.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path - -from typing import Optional, Tuple -from PIL import Image -import imagesize - -from .path import PathSerializerMixin -from ...annotation_types import Label - - -class CocoImage(PathSerializerMixin): - id: int - width: int - height: int - file_name: Path - license: Optional[int] = None - flickr_url: Optional[str] = None - coco_url: Optional[str] = None - - -def get_image_id(label: Label, idx: int) -> int: - if label.data.file_path is not None: - file_name = label.data.file_path.replace(".jpg", "") - if file_name.isdecimal(): - return file_name - return idx - - -def get_image(label: Label, image_root: Path, image_id: str) -> CocoImage: - path = Path(image_root, f"{image_id}.jpg") - if not path.exists(): - im = Image.fromarray(label.data.value) - im.save(path) - w, h = im.size - else: - w, h = imagesize.get(str(path)) - return CocoImage(id=image_id, width=w, height=h, file_name=Path(path.name)) - - -def id_to_rgb(id: int) -> Tuple[int, int, int]: - digits = [] - for _ in range(3): - digits.append(id % 256) - id //= 256 - return digits - - -def rgb_to_id(red: int, green: int, blue: int) -> int: - id = blue * 256 * 256 - id += green * 256 - id += red - return id diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py deleted file mode 100644 index 5241e596f..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/instance_dataset.py +++ /dev/null @@ -1,266 +0,0 @@ -# https://cocodataset.org/#format-data - -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Any, Dict, List, Tuple, Optional -from pathlib import Path - -import numpy as np -from tqdm import tqdm - -from ...annotation_types import ( - ImageData, - MaskData, - Mask, - ObjectAnnotation, - Label, - Polygon, - Point, - Rectangle, -) -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .annotation import ( - COCOObjectAnnotation, - RLE, - get_annotation_lookup, - rle_decoding, -) -from .image import CocoImage, get_image, get_image_id -from pydantic import BaseModel - - -def mask_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - # This is going to fill any holes into the multipolygon - # If you need to support holes use the panoptic data format - shapely = annotation.value.shapely.simplify(1).buffer(0) - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - # Iterate over polygon once or multiple polygon for each item - area = shapely.area - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[ - np.array(s.exterior.coords).ravel().tolist() - for s in ([shapely] if shapely.type == "Polygon" else shapely.geoms) - ], - area=area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def vector_to_coco_object_annotation( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> COCOObjectAnnotation: - shapely = annotation.value.shapely - xmin, ymin, xmax, ymax = shapely.bounds - segmentation = [] - if isinstance(annotation.value, Polygon): - for point in annotation.value.points: - segmentation.extend([point.x, point.y]) - else: - box = annotation.value - segmentation.extend( - [ - box.start.x, - box.start.y, - box.end.x, - box.start.y, - box.end.x, - box.end.y, - box.start.x, - box.end.y, - ] - ) - - return COCOObjectAnnotation( - id=annot_idx, - image_id=image_id, - category_id=category_id, - segmentation=[segmentation], - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - iscrowd=0, - ) - - -def rle_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> ObjectAnnotation: - mask = rle_decoding( - class_annotations.segmentation.counts, - *class_annotations.segmentation.size[::-1], - ) - return ObjectAnnotation( - name=class_name, - value=Mask(mask=MaskData.from_2D_arr(mask), color=[1, 1, 1]), - ) - - -def segmentations_to_common( - class_annotations: COCOObjectAnnotation, class_name: str -) -> List[ObjectAnnotation]: - # Technically it is polygons. But the key in coco is called segmentations.. - annotations = [] - for points in class_annotations.segmentation: - annotations.append( - ObjectAnnotation( - name=class_name, - value=Polygon( - points=[ - Point(x=points[i], y=points[i + 1]) - for i in range(0, len(points), 2) - ] - ), - ) - ) - return annotations - - -def object_annotation_to_coco( - annotation: ObjectAnnotation, - annot_idx: int, - image_id: int, - category_id: int, -) -> Optional[COCOObjectAnnotation]: - if isinstance(annotation.value, Mask): - return mask_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - elif isinstance(annotation.value, (Polygon, Rectangle)): - return vector_to_coco_object_annotation( - annotation, annot_idx, image_id, category_id - ) - else: - return None - - -def process_label( - label: Label, idx: int, image_root: str, max_annotations_per_image=10000 -) -> Tuple[np.ndarray, List[COCOObjectAnnotation], Dict[str, str]]: - annot_idx = idx * max_annotations_per_image - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - coco_annotations = [] - annotation_lookup = get_annotation_lookup(label.annotations) - categories = {} - for class_name in annotation_lookup: - for annotation in annotation_lookup[class_name]: - category_id = categories.get(annotation.name) or hash_category_name( - annotation.name - ) - coco_annotation = object_annotation_to_coco( - annotation, annot_idx, image_id, category_id - ) - if coco_annotation is not None: - coco_annotations.append(coco_annotation) - if annotation.name not in categories: - categories[annotation.name] = category_id - annot_idx += 1 - - return image, coco_annotations, categories - - -class CocoInstanceDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[COCOObjectAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, labels: LabelCollection, image_root: Path, max_workers=8 - ): - all_coco_annotations = [] - categories = {} - images = [] - futures = [] - coco_categories = {} - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit(process_label, label, idx, image_root) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.extend(result[1]) - coco_categories.update(result[2]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=1, - ) - for name, idx in coco_categories.items() - ] - for annot in all_coco_annotations: - annot.category_id = category_mapping[annot.category_id] - - return CocoInstanceDataset( - info={"image_root": image_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = get_annotation_lookup(self.annotations) - - for image in self.images: - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - - data = ImageData(file_path=str(im_path)) - annotations = [] - for class_annotations in annotation_lookup[image.id]: - if isinstance(class_annotations.segmentation, RLE): - annotations.append( - rle_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - elif isinstance(class_annotations.segmentation, list): - annotations.extend( - segmentations_to_common( - class_annotations, - category_lookup[class_annotations.category_id].name, - ) - ) - yield Label(data=data, annotations=annotations) diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py b/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py deleted file mode 100644 index cbb410548..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/panoptic_dataset.py +++ /dev/null @@ -1,242 +0,0 @@ -from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Dict, Any, List, Union -from pathlib import Path - -from tqdm import tqdm -import numpy as np -from PIL import Image - -from ...annotation_types.geometry import Polygon, Rectangle -from ...annotation_types import Label -from ...annotation_types.geometry.mask import Mask -from ...annotation_types.annotation import ObjectAnnotation -from ...annotation_types.data.raster import MaskData, ImageData -from ...annotation_types.collection import LabelCollection -from .categories import Categories, hash_category_name -from .image import CocoImage, get_image, get_image_id, id_to_rgb -from .annotation import PanopticAnnotation, SegmentInfo, get_annotation_lookup -from pydantic import BaseModel - - -def vector_to_coco_segment_info( - canvas: np.ndarray, - annotation: ObjectAnnotation, - annotation_idx: int, - image: CocoImage, - category_id: int, -): - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = annotation.value.draw( - height=image.height, - width=image.width, - canvas=canvas, - color=id_to_rgb(annotation_idx), - ) - - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def mask_to_coco_segment_info( - canvas: np.ndarray, annotation, annotation_idx: int, category_id -): - color = id_to_rgb(annotation_idx) - mask = annotation.value.draw(color=color) - shapely = annotation.value.shapely - if shapely.is_empty: - return - - xmin, ymin, xmax, ymax = shapely.bounds - canvas = np.where(canvas == (0, 0, 0), mask, canvas) - return SegmentInfo( - id=annotation_idx, - category_id=category_id, - area=shapely.area, - bbox=[xmin, ymin, xmax - xmin, ymax - ymin], - ), canvas - - -def process_label( - label: Label, idx: Union[int, str], image_root, mask_root, all_stuff=False -): - """ - Masks become stuff - Polygon and rectangle become thing - """ - annotations = get_annotation_lookup(label.annotations) - image_id = get_image_id(label, idx) - image = get_image(label, image_root, image_id) - canvas = np.zeros((image.height, image.width, 3)) - - segments = [] - categories = {} - is_thing = {} - - for class_idx, class_name in enumerate(annotations): - for annotation_idx, annotation in enumerate(annotations[class_name]): - categories[annotation.name] = hash_category_name(annotation.name) - if isinstance(annotation.value, Mask): - coco_segment_info = mask_to_coco_segment_info( - canvas, - annotation, - class_idx + 1, - categories[annotation.name], - ) - - if coco_segment_info is None: - # Filter out empty masks - continue - - segment, canvas = coco_segment_info - segments.append(segment) - is_thing[annotation.name] = 0 - - elif isinstance(annotation.value, (Polygon, Rectangle)): - coco_vector_info = vector_to_coco_segment_info( - canvas, - annotation, - annotation_idx=(class_idx if all_stuff else annotation_idx) - + 1, - image=image, - category_id=categories[annotation.name], - ) - - if coco_vector_info is None: - # Filter out empty annotations - continue - - segment, canvas = coco_vector_info - segments.append(segment) - is_thing[annotation.name] = 1 - int(all_stuff) - - mask_file = str(image.file_name).replace(".jpg", ".png") - mask_file = Path(mask_root, mask_file) - Image.fromarray(canvas.astype(np.uint8)).save(mask_file) - return ( - image, - PanopticAnnotation( - image_id=image_id, - file_name=Path(mask_file.name), - segments_info=segments, - ), - categories, - is_thing, - ) - - -class CocoPanopticDataset(BaseModel): - info: Dict[str, Any] = {} - images: List[CocoImage] - annotations: List[PanopticAnnotation] - categories: List[Categories] - - @classmethod - def from_common( - cls, - labels: LabelCollection, - image_root, - mask_root, - all_stuff, - max_workers=8, - ): - all_coco_annotations = [] - coco_categories = {} - coco_things = {} - images = [] - - if max_workers: - with ProcessPoolExecutor(max_workers=max_workers) as exc: - futures = [ - exc.submit( - process_label, - label, - idx, - image_root, - mask_root, - all_stuff, - ) - for idx, label in enumerate(labels) - ] - results = [ - future.result() for future in tqdm(as_completed(futures)) - ] - else: - results = [ - process_label(label, idx, image_root, mask_root, all_stuff) - for idx, label in enumerate(labels) - ] - - for result in results: - images.append(result[0]) - all_coco_annotations.append(result[1]) - coco_categories.update(result[2]) - coco_things.update(result[3]) - - category_mapping = { - category_id: idx + 1 - for idx, category_id in enumerate(coco_categories.values()) - } - categories = [ - Categories( - id=category_mapping[idx], - name=name, - supercategory="all", - isthing=coco_things.get(name, 1), - ) - for name, idx in coco_categories.items() - ] - - for annot in all_coco_annotations: - for segment in annot.segments_info: - segment.category_id = category_mapping[segment.category_id] - - return CocoPanopticDataset( - info={"image_root": image_root, "mask_root": mask_root}, - images=images, - annotations=all_coco_annotations, - categories=categories, - ) - - def to_common(self, image_root: Path, mask_root: Path): - category_lookup = { - category.id: category for category in self.categories - } - annotation_lookup = { - annotation.image_id: annotation for annotation in self.annotations - } - for image in self.images: - annotations = [] - annotation = annotation_lookup[image.id] - - im_path = Path(image_root, image.file_name) - if not im_path.exists(): - raise ValueError( - f"Cannot find file {im_path}. Make sure `image_root` is set properly" - ) - if not str(annotation.file_name).endswith(".png"): - raise ValueError( - f"COCO masks must be stored as png files and their extension must be `.png`. Found {annotation.file_name}" - ) - mask = MaskData( - file_path=str(Path(mask_root, annotation.file_name)) - ) - - for segmentation in annotation.segments_info: - category = category_lookup[segmentation.category_id] - annotations.append( - ObjectAnnotation( - name=category.name, - value=Mask(mask=mask, color=id_to_rgb(segmentation.id)), - ) - ) - data = ImageData(file_path=str(im_path)) - yield Label(data=data, annotations=annotations) - del annotation_lookup[image.id] diff --git a/libs/labelbox/src/labelbox/data/serialization/coco/path.py b/libs/labelbox/src/labelbox/data/serialization/coco/path.py deleted file mode 100644 index c3be84f31..000000000 --- a/libs/labelbox/src/labelbox/data/serialization/coco/path.py +++ /dev/null @@ -1,9 +0,0 @@ -from pathlib import Path -from pydantic import BaseModel, model_serializer - - -class PathSerializerMixin(BaseModel): - @model_serializer(mode="wrap") - def serialize_model(self, handler): - res = handler(self) - return {k: str(v) if isinstance(v, Path) else v for k, v in res.items()} diff --git a/libs/labelbox/tests/data/serialization/coco/test_coco.py b/libs/labelbox/tests/data/serialization/coco/test_coco.py deleted file mode 100644 index a7c733ce5..000000000 --- a/libs/labelbox/tests/data/serialization/coco/test_coco.py +++ /dev/null @@ -1,38 +0,0 @@ -import json -from pathlib import Path - -from labelbox.data.serialization.coco import COCOConverter - -COCO_ASSETS_DIR = "tests/data/assets/coco" - - -def run_instances(tmpdir): - instance_json = json.load(open(Path(COCO_ASSETS_DIR, "instances.json"))) - res = COCOConverter.deserialize_instances( - instance_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances( - res, - Path(tmpdir), - ) - - -def test_rle_objects(tmpdir): - rle_json = json.load(open(Path(COCO_ASSETS_DIR, "rle.json"))) - res = COCOConverter.deserialize_instances( - rle_json, Path(COCO_ASSETS_DIR, "images") - ) - back = COCOConverter.serialize_instances(res, tmpdir) - - -def test_panoptic(tmpdir): - panoptic_json = json.load(open(Path(COCO_ASSETS_DIR, "panoptic.json"))) - image_dir, mask_dir = [ - Path(COCO_ASSETS_DIR, dir_name) for dir_name in ["images", "masks"] - ] - res = COCOConverter.deserialize_panoptic(panoptic_json, image_dir, mask_dir) - back = COCOConverter.serialize_panoptic( - res, - Path(f"/{tmpdir}/images_panoptic"), - Path(f"/{tmpdir}/masks_panoptic"), - ) From 14a1d3d9189545bf3ee6734ca5f452ed65c22598 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 10:51:40 -0500 Subject: [PATCH 07/13] added basic linting from ruff --- libs/labelbox/pyproject.toml | 4 ++++ .../labelbox/data/annotation_types/data/raster.py | 6 ++++-- .../src/labelbox/data/annotation_types/data/text.py | 2 +- .../src/labelbox/data/annotation_types/data/video.py | 2 +- .../src/labelbox/data/annotation_types/video.py | 2 +- .../src/labelbox/schema/data_row_metadata.py | 9 ++++----- libs/labelbox/src/labelbox/schema/export_params.py | 4 ++-- .../schema/internal/descriptor_file_creator.py | 2 +- .../tests/data/annotation_types/test_collection.py | 2 +- libs/labelbox/tests/integration/test_data_rows.py | 7 ++++--- libs/labelbox/tests/integration/test_ephemeral.py | 4 ++-- libs/labelbox/tests/integration/test_ontology.py | 12 ++++++------ libs/labelbox/tests/integration/test_toggle_mal.py | 6 +++--- libs/labelbox/tests/unit/test_unit_ontology.py | 4 ++-- 14 files changed, 36 insertions(+), 30 deletions(-) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index 58ce3410a..2e9a81a75 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -74,6 +74,10 @@ dev-dependencies = [ [tool.ruff] line-length = 80 +[tool.ruff.lint] +ignore = ["F", "E712", "E721", "E722"] +exclude = ["**/__init__.py"] + [tool.rye.scripts] unit = "pytest tests/unit" # https://github.com/Labelbox/labelbox-python/blob/7c84fdffbc14fd1f69d2a6abdcc0087dc557fa4e/Makefile diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index ba4c6485f..fd76d9a66 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -172,7 +172,7 @@ def validate_args(self, values): uid = self.uid global_key = self.global_key if ( - uid == file_path == im_bytes == url == global_key == None + uid == file_path == im_bytes == url == global_key is None and arr is None ): raise ValueError( @@ -191,7 +191,9 @@ def validate_args(self, values): return self def __repr__(self) -> str: - symbol_or_none = lambda data: "..." if data is not None else None + def symbol_or_none(data): + return "..." if data is not None else None + return ( f"{self.__class__.__name__}(im_bytes={symbol_or_none(self.im_bytes)}," f"file_path={self.file_path}," diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py b/libs/labelbox/src/labelbox/data/annotation_types/data/text.py index fe4c222d3..3926a0832 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/text.py @@ -101,7 +101,7 @@ def validate_date(self, values): url = self.url uid = self.uid global_key = self.global_key - if uid == file_path == text == url == global_key == None: + if uid == file_path == text == url == global_key is None: raise ValueError( "One of `file_path`, `text`, `uid`, `global_key` or `url` required." ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py b/libs/labelbox/src/labelbox/data/annotation_types/data/video.py index 581801036..0f40911d8 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/video.py @@ -159,7 +159,7 @@ def validate_data(self): uid = self.uid global_key = self.global_key - if uid == file_path == frames == url == global_key == None: + if uid == file_path == frames == url == global_key is None: raise ValueError( "One of `file_path`, `frames`, `uid`, `global_key` or `url` required." ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/video.py b/libs/labelbox/src/labelbox/data/annotation_types/video.py index cfebd7a1f..5a93704c8 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/video.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/video.py @@ -125,7 +125,7 @@ class MaskFrame(_CamelCaseMixin, BaseModel): def validate_args(self, values): im_bytes = self.im_bytes instance_uri = self.instance_uri - if im_bytes == instance_uri == None: + if im_bytes == instance_uri is None: raise ValueError("One of `instance_uri`, `im_bytes` required.") return self diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 288459a89..5883b767d 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -803,13 +803,13 @@ def _convert_metadata_field(metadata_field): if isinstance(metadata_field, DataRowMetadataField): return metadata_field elif isinstance(metadata_field, dict): - if not "value" in metadata_field: + if "value" not in metadata_field: raise ValueError( f"Custom metadata field '{metadata_field}' must have a 'value' key" ) if ( - not "schema_id" in metadata_field - and not "name" in metadata_field + "schema_id" not in metadata_field + and "name" not in metadata_field ): raise ValueError( f"Custom metadata field '{metadata_field}' must have either 'schema_id' or 'name' key" @@ -954,9 +954,8 @@ def _validate_custom_schema_by_name( def _batch_items(iterable: List[Any], size: int) -> Generator[Any, None, None]: - l = len(iterable) for ndx in range(0, l, size): - yield iterable[ndx : min(ndx + size, l)] + yield iterable[ndx : min(ndx + size, len(iterable))] def _batch_operations( diff --git a/libs/labelbox/src/labelbox/schema/export_params.py b/libs/labelbox/src/labelbox/schema/export_params.py index b15bc2828..d5024bd30 100644 --- a/libs/labelbox/src/labelbox/schema/export_params.py +++ b/libs/labelbox/src/labelbox/schema/export_params.py @@ -2,8 +2,6 @@ from typing import Optional, List -EXPORT_LIMIT = 30 - from labelbox.schema.media_type import MediaType if sys.version_info >= (3, 8): @@ -11,6 +9,8 @@ else: from typing_extensions import TypedDict +EXPORT_LIMIT = 30 + class DataRowParams(TypedDict): data_row_details: Optional[bool] diff --git a/libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py b/libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py index ce3ce4b35..9f2ea72a0 100644 --- a/libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py +++ b/libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py @@ -161,7 +161,7 @@ def check_message_keys(message): ] ) for key in message.keys(): - if not key in accepted_message_keys: + if key not in accepted_message_keys: raise KeyError( f"Invalid {key} key found! Accepted keys in messages list is {accepted_message_keys}" ) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 9deddc3c8..1c9cd669e 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -114,7 +114,7 @@ def test_adding_to_dataset(signer): assert label.data.url != uuid generated_label = next(generator) assert generated_label.data.url == uuid - assert generated_label.data.external_id != None + assert generated_label.data.external_id is not None assert generated_label.data.uid == dataset.uid assert label.data.url == uuid diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 7f69c2995..f9770c274 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -677,9 +677,10 @@ def test_data_row_update( pdf_url = "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf" tileLayerUrl = "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json" data_row.update(row_data={"pdfUrl": pdf_url, "tileLayerUrl": tileLayerUrl}) - custom_check = ( - lambda data_row: data_row.row_data and "pdfUrl" not in data_row.row_data - ) + + def custom_check(data_row): + return data_row.row_data and "pdfUrl" not in data_row.row_data + data_row = wait_for_data_row_processing( client, data_row, custom_check=custom_check ) diff --git a/libs/labelbox/tests/integration/test_ephemeral.py b/libs/labelbox/tests/integration/test_ephemeral.py index a23572fdf..3c4fc62e4 100644 --- a/libs/labelbox/tests/integration/test_ephemeral.py +++ b/libs/labelbox/tests/integration/test_ephemeral.py @@ -7,7 +7,7 @@ reason="This test only runs in EPHEMERAL environment", ) def test_org_and_user_setup(client, ephmeral_client): - assert type(client) == ephmeral_client + assert type(client) is ephmeral_client assert client.admin_client assert client.api_key != client.admin_client.api_key @@ -22,4 +22,4 @@ def test_org_and_user_setup(client, ephmeral_client): reason="This test does not run in EPHEMERAL environment", ) def test_integration_client(client, integration_client): - assert type(client) == integration_client + assert type(client) is integration_client diff --git a/libs/labelbox/tests/integration/test_ontology.py b/libs/labelbox/tests/integration/test_ontology.py index 91ef74a39..bf70536d0 100644 --- a/libs/labelbox/tests/integration/test_ontology.py +++ b/libs/labelbox/tests/integration/test_ontology.py @@ -13,7 +13,7 @@ def test_feature_schema_is_not_archived(client, ontology): result = client.is_feature_schema_archived( ontology.uid, feature_schema_to_check["featureSchemaId"] ) - assert result == False + assert result is False def test_feature_schema_is_archived(client, configured_project_with_label): @@ -23,10 +23,10 @@ def test_feature_schema_is_archived(client, configured_project_with_label): result = client.delete_feature_schema_from_ontology( ontology.uid, feature_schema_id ) - assert result.archived == True and result.deleted == False + assert result.archived is True and result.deleted is False assert ( client.is_feature_schema_archived(ontology.uid, feature_schema_id) - == True + is True ) @@ -58,8 +58,8 @@ def test_delete_tool_feature_from_ontology(client, ontology): result = client.delete_feature_schema_from_ontology( ontology.uid, feature_schema_to_delete["featureSchemaId"] ) - assert result.deleted == True - assert result.archived == False + assert result.deleted is True + assert result.archived is False updatedOntology = client.get_ontology(ontology.uid) assert len(updatedOntology.normalized["tools"]) == 1 @@ -300,7 +300,7 @@ def test_unarchive_feature_schema_node(client, ontology): result = client.unarchive_feature_schema_node( ontology.uid, feature_schema_to_unarchive["featureSchemaId"] ) - assert result == None + assert result is None def test_unarchive_feature_schema_node_for_non_existing_feature_schema( diff --git a/libs/labelbox/tests/integration/test_toggle_mal.py b/libs/labelbox/tests/integration/test_toggle_mal.py index 41dfbe395..566c4210c 100644 --- a/libs/labelbox/tests/integration/test_toggle_mal.py +++ b/libs/labelbox/tests/integration/test_toggle_mal.py @@ -1,9 +1,9 @@ def test_enable_model_assisted_labeling(project): response = project.enable_model_assisted_labeling() - assert response == True + assert response is True response = project.enable_model_assisted_labeling(True) - assert response == True + assert response is True response = project.enable_model_assisted_labeling(False) - assert response == False + assert response is False diff --git a/libs/labelbox/tests/unit/test_unit_ontology.py b/libs/labelbox/tests/unit/test_unit_ontology.py index 0566ad623..4b4958beb 100644 --- a/libs/labelbox/tests/unit/test_unit_ontology.py +++ b/libs/labelbox/tests/unit/test_unit_ontology.py @@ -197,7 +197,7 @@ def test_add_ontology_tool() -> None: assert len(o.tools) == 2 for tool in o.tools: - assert type(tool) == Tool + assert type(tool) is Tool with pytest.raises(InconsistentOntologyException) as exc: o.add_tool(Tool(tool=Tool.Type.BBOX, name="bounding box")) @@ -217,7 +217,7 @@ def test_add_ontology_classification() -> None: assert len(o.classifications) == 2 for classification in o.classifications: - assert type(classification) == Classification + assert type(classification) is Classification with pytest.raises(InconsistentOntologyException) as exc: o.add_classification( From 2658a106b2a53bb633e1ec091f3f6a5577c3def4 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 10:57:19 -0500 Subject: [PATCH 08/13] added a few more rules --- libs/labelbox/pyproject.toml | 5 +++-- libs/labelbox/src/labelbox/client.py | 6 +++--- .../labelbox/data/metrics/confusion_matrix/calculation.py | 2 +- libs/labelbox/src/labelbox/data/metrics/iou/calculation.py | 2 +- libs/labelbox/src/labelbox/orm/db_object.py | 2 +- .../export/streamable/test_export_embeddings_streamable.py | 2 +- libs/labelbox/tests/integration/test_benchmark.py | 6 +++--- libs/labelbox/tests/integration/test_data_rows.py | 2 +- 8 files changed, 14 insertions(+), 13 deletions(-) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index 2e9a81a75..6c57473a7 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -75,7 +75,7 @@ dev-dependencies = [ line-length = 80 [tool.ruff.lint] -ignore = ["F", "E712", "E721", "E722"] +ignore = ["F", "E722"] exclude = ["**/__init__.py"] [tool.rye.scripts] @@ -93,9 +93,10 @@ unit = "pytest tests/unit" # LABELBOX_TEST_BASE_URL="http://host.docker.internal:8080" \ integration = { cmd = "pytest tests/integration" } data = { cmd = "pytest tests/data" } +rye-lint = "rye lint" rye-fmt-check = "rye fmt --check" mypy-lint = "mypy src --pretty --show-error-codes --non-interactive --install-types" -lint = { chain = ["mypy-lint", "rye-fmt-check"] } +lint = { chain = ["rye-fmt-check", "mypy-lint", "rye-lint"] } test = { chain = ["lint", "unit", "integration"] } [tool.hatch.metadata] diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index b0b5a1407..8361ba4d7 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -626,7 +626,7 @@ def _get_all(self, db_object_type, where, filter_deleted=True): An iterable of `db_object_type` instances. """ if filter_deleted: - not_deleted = db_object_type.deleted == False + not_deleted = db_object_type.deleted is False where = not_deleted if where is None else where & not_deleted query_str, params = query.get_all(db_object_type, where) @@ -2297,11 +2297,11 @@ def delete_feature_schema_from_ontology( if response.status_code == requests.codes.ok: response_json = response.json() - if response_json["archived"] == True: + if response_json["archived"] is True: logger.info( "Feature schema was archived from the ontology because it had associated labels." ) - elif response_json["deleted"] == True: + elif response_json["deleted"] is True: logger.info( "Feature schema was successfully removed from the ontology" ) diff --git a/libs/labelbox/src/labelbox/data/metrics/confusion_matrix/calculation.py b/libs/labelbox/src/labelbox/data/metrics/confusion_matrix/calculation.py index 938e17f65..83410a540 100644 --- a/libs/labelbox/src/labelbox/data/metrics/confusion_matrix/calculation.py +++ b/libs/labelbox/src/labelbox/data/metrics/confusion_matrix/calculation.py @@ -130,7 +130,7 @@ def classification_confusion_matrix( prediction, ground_truth = predictions[0], ground_truths[0] - if type(prediction) != type(ground_truth): + if type(prediction) is not type(ground_truth): raise TypeError( "Classification features must be the same type to compute agreement. " f"Found `{type(prediction)}` and `{type(ground_truth)}`" diff --git a/libs/labelbox/src/labelbox/data/metrics/iou/calculation.py b/libs/labelbox/src/labelbox/data/metrics/iou/calculation.py index 2a376d3fe..d0237963c 100644 --- a/libs/labelbox/src/labelbox/data/metrics/iou/calculation.py +++ b/libs/labelbox/src/labelbox/data/metrics/iou/calculation.py @@ -209,7 +209,7 @@ def classification_miou( prediction, ground_truth = predictions[0], ground_truths[0] - if type(prediction) != type(ground_truth): + if type(prediction) is not type(ground_truth): raise TypeError( "Classification features must be the same type to compute agreement. " f"Found `{type(prediction)}` and `{type(ground_truth)}`" diff --git a/libs/labelbox/src/labelbox/orm/db_object.py b/libs/labelbox/src/labelbox/orm/db_object.py index b210a8a5b..684968364 100644 --- a/libs/labelbox/src/labelbox/orm/db_object.py +++ b/libs/labelbox/src/labelbox/orm/db_object.py @@ -177,7 +177,7 @@ def _to_many(self, where=None, order_by=None): ) if rel.filter_deleted: - not_deleted = rel.destination_type.deleted == False + not_deleted = rel.destination_type.deleted is False where = not_deleted if where is None else where & not_deleted query_string, params = query.relationship( diff --git a/libs/labelbox/tests/data/export/streamable/test_export_embeddings_streamable.py b/libs/labelbox/tests/data/export/streamable/test_export_embeddings_streamable.py index 071acbb5b..25e58e2dc 100644 --- a/libs/labelbox/tests/data/export/streamable/test_export_embeddings_streamable.py +++ b/libs/labelbox/tests/data/export/streamable/test_export_embeddings_streamable.py @@ -86,6 +86,6 @@ def test_export_embeddings_custom( if emb["id"] == embedding.id: assert emb["name"] == embedding.name assert emb["dimensions"] == embedding.dims - assert emb["is_custom"] == True + assert emb["is_custom"] is True assert len(emb["values"]) == 1 assert emb["values"][0]["value"] == vector diff --git a/libs/labelbox/tests/integration/test_benchmark.py b/libs/labelbox/tests/integration/test_benchmark.py index c10542bda..661f83bd7 100644 --- a/libs/labelbox/tests/integration/test_benchmark.py +++ b/libs/labelbox/tests/integration/test_benchmark.py @@ -1,17 +1,17 @@ def test_benchmark(configured_project_with_label): project, _, data_row, label = configured_project_with_label assert set(project.benchmarks()) == set() - assert label.is_benchmark_reference == False + assert label.is_benchmark_reference is False benchmark = label.create_benchmark() assert set(project.benchmarks()) == {benchmark} assert benchmark.reference_label() == label # Refresh label data to check it's benchmark reference label = list(data_row.labels())[0] - assert label.is_benchmark_reference == True + assert label.is_benchmark_reference is True benchmark.delete() assert set(project.benchmarks()) == set() # Refresh label data to check it's benchmark reference label = list(data_row.labels())[0] - assert label.is_benchmark_reference == False + assert label.is_benchmark_reference is False diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index f9770c274..277f05d71 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -120,7 +120,7 @@ def make_metadata_fields_dict(): def test_get_data_row_by_global_key(data_row_and_global_key, client, rand_gen): _, global_key = data_row_and_global_key data_row = client.get_data_row_by_global_key(global_key) - assert type(data_row) == DataRow + assert type(data_row) is DataRow assert data_row.global_key == global_key From d9d4564c475ddb4e420e13804a7b4e3e465ab940 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 21:00:22 -0500 Subject: [PATCH 09/13] fix merge conflict --- .../test_bulk_import_request.py | 210 ------------------ 1 file changed, 210 deletions(-) delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index b2289503e..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,210 +0,0 @@ -import uuid -from labelbox import parser, Project -import pytest - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0 From 0c7d9be007d466935975630e039f098bad8b6fb0 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 21:14:54 -0500 Subject: [PATCH 10/13] fix lint test --- libs/labelbox/tests/integration/test_data_rows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 277f05d71..8ec6b20c3 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -1024,9 +1024,9 @@ def test_data_row_bulk_creation_with_same_global_keys( task.wait_till_done() assert task.status == "COMPLETE" - assert type(task.failed_data_rows) is list + assert isinstance(task.failed_data_rows, list) assert len(task.failed_data_rows) == 1 - assert type(task.created_data_rows) is list + assert isinstance(task.created_data_rows, list) assert len(task.created_data_rows) == 1 assert ( task.failed_data_rows[0]["message"] From 7209279a433225b48aa71d24fb3da5cdd33495c7 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 17 Sep 2024 22:20:08 -0500 Subject: [PATCH 11/13] fix tests --- libs/labelbox/src/labelbox/client.py | 2 +- libs/labelbox/src/labelbox/orm/db_object.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 8361ba4d7..3304ea1af 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -627,7 +627,7 @@ def _get_all(self, db_object_type, where, filter_deleted=True): """ if filter_deleted: not_deleted = db_object_type.deleted is False - where = not_deleted if where is None else where & not_deleted + where = (not_deleted if where is None else where) and not_deleted query_str, params = query.get_all(db_object_type, where) return PaginatedCollection( diff --git a/libs/labelbox/src/labelbox/orm/db_object.py b/libs/labelbox/src/labelbox/orm/db_object.py index 684968364..b6843213c 100644 --- a/libs/labelbox/src/labelbox/orm/db_object.py +++ b/libs/labelbox/src/labelbox/orm/db_object.py @@ -178,7 +178,7 @@ def _to_many(self, where=None, order_by=None): if rel.filter_deleted: not_deleted = rel.destination_type.deleted is False - where = not_deleted if where is None else where & not_deleted + where = (not_deleted if where is None else where) and not_deleted query_string, params = query.relationship( self.source if self.filter_on_id else type(self.source), From e02640a2bc8a2582b364b79201bc345f9220c39b Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:11:41 -0500 Subject: [PATCH 12/13] fixed error --- libs/labelbox/src/labelbox/schema/data_row_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 5883b767d..d6c50b975 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -954,7 +954,7 @@ def _validate_custom_schema_by_name( def _batch_items(iterable: List[Any], size: int) -> Generator[Any, None, None]: - for ndx in range(0, l, size): + for ndx in range(0, len(iterable), size): yield iterable[ndx : min(ndx + size, len(iterable))] From 972b79a14d32ea40f7825d759091f32756565028 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:41:21 -0500 Subject: [PATCH 13/13] Added an ignore rule --- libs/labelbox/src/labelbox/client.py | 4 ++-- libs/labelbox/src/labelbox/orm/db_object.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 3304ea1af..441c12422 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -626,8 +626,8 @@ def _get_all(self, db_object_type, where, filter_deleted=True): An iterable of `db_object_type` instances. """ if filter_deleted: - not_deleted = db_object_type.deleted is False - where = (not_deleted if where is None else where) and not_deleted + not_deleted = db_object_type.deleted == False # noqa: E712 Needed for bit operator to combine comparisons + where = not_deleted if where is None else where & not_deleted query_str, params = query.get_all(db_object_type, where) return PaginatedCollection( diff --git a/libs/labelbox/src/labelbox/orm/db_object.py b/libs/labelbox/src/labelbox/orm/db_object.py index b6843213c..7f42d7b1e 100644 --- a/libs/labelbox/src/labelbox/orm/db_object.py +++ b/libs/labelbox/src/labelbox/orm/db_object.py @@ -177,8 +177,8 @@ def _to_many(self, where=None, order_by=None): ) if rel.filter_deleted: - not_deleted = rel.destination_type.deleted is False - where = (not_deleted if where is None else where) and not_deleted + not_deleted = rel.destination_type.deleted == False # noqa: E712 Needed for bit operator to combine comparisons + where = not_deleted if where is None else where & not_deleted query_string, params = query.relationship( self.source if self.filter_on_id else type(self.source),