From 7d823809908cd0ada47b9b1ffa92627b45949e9f Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:32:46 -0500 Subject: [PATCH 01/18] Removed assign_feature_schema_ids --- libs/labelbox/src/labelbox/client.py | 4 - .../data/annotation_types/collection.py | 15 -- .../labelbox/data/annotation_types/label.py | 36 --- .../data/annotation_types/test_collection.py | 21 -- .../tests/data/annotation_types/test_label.py | 208 ------------------ 5 files changed, 284 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 4498531ce..611871dbb 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -1644,10 +1644,6 @@ def get_data_row_ids_for_global_keys( """ Gets data row ids for a list of global keys. - Deprecation Notice: This function will soon no longer return 'Deleted Data Rows' - as part of the 'results'. Global keys for deleted data rows will soon be placed - under 'Data Row not found' portion. - Args: A list of global keys Returns: diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 2e76176a8..51bcce1b2 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -25,21 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs): self._fns = {} super().__init__(data, *args, **kwargs) - def assign_feature_schema_ids( - self, ontology_builder: "ontology.OntologyBuilder" - ) -> "LabelGenerator": - def _assign_ids(label: Label): - label.assign_feature_schema_ids(ontology_builder) - return label - - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Feature schema ids" - " are no longer required for importing." - ) - self._fns["assign_feature_schema_ids"] = _assign_ids - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 8ae05f898..2f835b23c 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -136,42 +136,6 @@ def create_data_row( self.data.external_id = data_row.external_id return self - def assign_feature_schema_ids( - self, ontology_builder: ontology.OntologyBuilder - ) -> "Label": - """ - Adds schema ids to all FeatureSchema objects in the Labels. - - Args: - ontology_builder: The ontology that matches the feature names assigned to objects in this dataset - Returns: - Label. useful for chaining these modifying functions - - Note: You can now import annotations using names directly without having to lookup schema_ids - """ - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Feature schema ids" - " are no longer required for importing." - ) - tool_lookup, classification_lookup = get_feature_schema_lookup( - ontology_builder - ) - for annotation in self.annotations: - if isinstance(annotation, ClassificationAnnotation): - self._assign_or_raise(annotation, classification_lookup) - self._assign_option(annotation, classification_lookup) - elif isinstance(annotation, ObjectAnnotation): - self._assign_or_raise(annotation, tool_lookup) - for classification in annotation.classifications: - self._assign_or_raise(classification, classification_lookup) - self._assign_option(classification, classification_lookup) - else: - raise TypeError( - f"Unexpected type found for annotation. {type(annotation)}" - ) - return self - def _assign_or_raise(self, annotation, lookup: Dict[str, str]) -> None: if annotation.feature_schema_id is not None: return diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index f818b94ff..e0fa7bd53 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -72,27 +72,6 @@ def test_conversion(list_of_labels): assert [x for x in label_collection] == list_of_labels -def test_adding_schema_ids(): - name = "line_feature" - label = Label( - data=GenericDataRowData(uid="123456"), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - ) - ], - ) - feature_schema_id = "expected_id" - ontology = OntologyBuilder( - tools=[ - Tool(Tool.Type.LINE, name=name, feature_schema_id=feature_schema_id) - ] - ) - generator = LabelGenerator([label]).assign_feature_schema_ids(ontology) - assert next(generator).annotations[0].feature_schema_id == feature_schema_id - - def test_adding_to_masks(signer): label = Label( data=GenericDataRowData(uid="12345"), diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 8439837ed..9cd992b0c 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -23,214 +23,6 @@ import pytest -def test_schema_assignment_geometry(): - name = "line_feature" - label = Label( - data=MaskData( - arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" - ), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - ) - ], - ) - feature_schema_id = "expected_id" - ontology = OntologyBuilder( - tools=[ - Tool(Tool.Type.LINE, name=name, feature_schema_id=feature_schema_id) - ] - ) - label.assign_feature_schema_ids(ontology) - - assert label.annotations[0].feature_schema_id == feature_schema_id - - -def test_schema_assignment_classification(): - radio_name = "radio_name" - text_name = "text_name" - option_name = "my_option" - - label = Label( - data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), - annotations=[ - ClassificationAnnotation( - value=Radio(answer=ClassificationAnswer(name=option_name)), - name=radio_name, - ), - ClassificationAnnotation( - value=Text(answer="some text"), name=text_name - ), - ], - ) - radio_schema_id = "radio_schema_id" - text_schema_id = "text_schema_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[], - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=radio_schema_id, - options=[ - Option( - value=option_name, feature_schema_id=option_schema_id - ) - ], - ), - OClassification( - class_type=OClassification.Type.TEXT, - name=text_name, - feature_schema_id=text_schema_id, - ), - ], - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == radio_schema_id - assert label.annotations[1].feature_schema_id == text_schema_id - assert ( - label.annotations[0].value.answer.feature_schema_id == option_schema_id - ) - - -def test_schema_assignment_subclass(): - name = "line_feature" - radio_name = "radio_name" - option_name = "my_option" - classification = ClassificationAnnotation( - name=radio_name, - value=Radio(answer=ClassificationAnswer(name=option_name)), - ) - label = Label( - data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - classifications=[classification], - ) - ], - ) - feature_schema_id = "expected_id" - classification_schema_id = "classification_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[ - Tool( - Tool.Type.LINE, - name=name, - feature_schema_id=feature_schema_id, - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=classification_schema_id, - options=[ - Option( - value=option_name, - feature_schema_id=option_schema_id, - ) - ], - ) - ], - ) - ] - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == feature_schema_id - assert ( - label.annotations[0].classifications[0].feature_schema_id - == classification_schema_id - ) - assert ( - label.annotations[0].classifications[0].value.answer.feature_schema_id - == option_schema_id - ) - - -def test_highly_nested(): - name = "line_feature" - radio_name = "radio_name" - nested_name = "nested_name" - option_name = "my_option" - nested_option_name = "nested_option_name" - classification = ClassificationAnnotation( - name=radio_name, - value=Radio(answer=ClassificationAnswer(name=option_name)), - classifications=[ - ClassificationAnnotation( - value=Radio( - answer=ClassificationAnswer(name=nested_option_name) - ), - name=nested_name, - ) - ], - ) - label = Label( - data=MaskData( - arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" - ), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - classifications=[classification], - ) - ], - ) - feature_schema_id = "expected_id" - classification_schema_id = "classification_id" - nested_classification_schema_id = "nested_classification_schema_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[ - Tool( - Tool.Type.LINE, - name=name, - feature_schema_id=feature_schema_id, - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=classification_schema_id, - options=[ - Option( - value=option_name, - feature_schema_id=option_schema_id, - options=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=nested_name, - feature_schema_id=nested_classification_schema_id, - options=[ - Option( - value=nested_option_name, - feature_schema_id=nested_classification_schema_id, - ) - ], - ) - ], - ) - ], - ) - ], - ) - ] - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == feature_schema_id - assert ( - label.annotations[0].classifications[0].feature_schema_id - == classification_schema_id - ) - assert ( - label.annotations[0].classifications[0].value.answer.feature_schema_id - == option_schema_id - ) - - def test_schema_assignment_confidence(): name = "line_feature" label = Label( From 9dd54c77441a45a194f93506d123e9317547f056 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:49:20 -0500 Subject: [PATCH 02/18] removed rest of deprecated --- .../src/labelbox/schema/asset_attachment.py | 11 +-- .../src/labelbox/schema/data_row_metadata.py | 24 +---- libs/labelbox/src/labelbox/schema/dataset.py | 38 +------ .../src/labelbox/schema/queue_mode.py | 6 -- libs/labelbox/src/labelbox/schema/slice.py | 37 ------- .../tests/integration/test_data_rows.py | 98 +------------------ libs/labelbox/tests/unit/test_queue_mode.py | 4 - 7 files changed, 9 insertions(+), 209 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/asset_attachment.py b/libs/labelbox/src/labelbox/schema/asset_attachment.py index 0d5598c84..9a56dbb72 100644 --- a/libs/labelbox/src/labelbox/schema/asset_attachment.py +++ b/libs/labelbox/src/labelbox/schema/asset_attachment.py @@ -7,15 +7,6 @@ class AttachmentType(str, Enum): - @classmethod - def __missing__(cls, value: object): - if str(value) == "TEXT": - warnings.warn( - "The TEXT attachment type is deprecated. Use RAW_TEXT instead." - ) - return cls.RAW_TEXT - return value - VIDEO = "VIDEO" IMAGE = "IMAGE" IMAGE_OVERLAY = "IMAGE_OVERLAY" @@ -30,7 +21,7 @@ class AssetAttachment(DbObject): """Asset attachment provides extra context about an asset while labeling. Attributes: - attachment_type (str): IMAGE, VIDEO, IMAGE_OVERLAY, HTML, RAW_TEXT, TEXT_URL, or PDF_URL. TEXT attachment type is deprecated. + attachment_type (str): IMAGE, VIDEO, IMAGE_OVERLAY, HTML, RAW_TEXT, TEXT_URL, or PDF_URL. attachment_value (str): URL to an external file or a string of text attachment_name (str): The name of the attachment """ diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 2fd90e2f9..8bed7d6f5 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -1,5 +1,4 @@ # type: ignore -import warnings from copy import deepcopy from datetime import datetime from enum import Enum @@ -673,29 +672,14 @@ def bulk_delete( if not len(deletes): raise ValueError("The 'deletes' list cannot be empty.") - passed_strings = False - for i, delete in enumerate(deletes): - if isinstance(delete.data_row_id, str): - passed_strings = True - deletes[i] = DeleteDataRowMetadata( - data_row_id=UniqueId(delete.data_row_id), - fields=delete.fields, - ) - elif isinstance(delete.data_row_id, UniqueId): - continue - elif isinstance(delete.data_row_id, GlobalKey): - continue - else: + for delete in enumerate(deletes): + if not isinstance(delete.data_row_id, UniqueId) or not isinstance( + delete.data_row_id, GlobalKey + ): raise ValueError( f"Invalid data row identifier type '{type(delete.data_row_id)}' for '{delete.data_row_id}'" ) - if passed_strings: - warnings.warn( - "Using string for data row id will be deprecated. Please use " - "UniqueId instead." - ) - def _batch_delete( deletes: List[_DeleteBatchDataRowMetadata], ) -> List[DataRowMetadataBatchResponse]: diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index 6d879e767..ac709a60b 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -166,47 +166,13 @@ def create_data_row(self, items=None, **kwargs) -> "DataRow": return self.client.get_data_row(res[0]["id"]) - def create_data_rows_sync( - self, items, file_upload_thread_count=FILE_UPLOAD_THREAD_COUNT - ) -> None: - """Synchronously bulk upload data rows. - - Use this instead of `Dataset.create_data_rows` for smaller batches of data rows that need to be uploaded quickly. - Cannot use this for uploads containing more than 1000 data rows. - Each data row is also limited to 5 attachments. - - Args: - items (iterable of (dict or str)): - See the docstring for `Dataset._create_descriptor_file` for more information. - Returns: - None. If the function doesn't raise an exception then the import was successful. - - Raises: - ResourceCreationError: If the `items` parameter does not conform to - the specification in Dataset._create_descriptor_file or if the server did not accept the - DataRow creation request (unknown reason). - InvalidAttributeError: If there are fields in `items` not valid for - a DataRow. - ValueError: When the upload parameters are invalid - """ - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Please use create_data_rows instead." - ) - - self._create_data_rows_sync( - items, file_upload_thread_count=file_upload_thread_count - ) - - return None # Return None if no exception is raised - def _create_data_rows_sync( self, items, file_upload_thread_count=FILE_UPLOAD_THREAD_COUNT ) -> "DataUpsertTask": max_data_rows_supported = 1000 if len(items) > max_data_rows_supported: raise ValueError( - f"Dataset.create_data_rows_sync() supports a max of {max_data_rows_supported} data rows." + f"Dataset._create_data_rows_sync() supports a max of {max_data_rows_supported} data rows." " For larger imports use the async function Dataset.create_data_rows()" ) if file_upload_thread_count < 1: @@ -235,8 +201,6 @@ def create_data_rows( ) -> "DataUpsertTask": """Asynchronously bulk upload data rows - Use this instead of `Dataset.create_data_rows_sync` uploads for batches that contain more than 1000 data rows. - Args: items (iterable of (dict or str)) diff --git a/libs/labelbox/src/labelbox/schema/queue_mode.py b/libs/labelbox/src/labelbox/schema/queue_mode.py index 333e92987..fc1e850d9 100644 --- a/libs/labelbox/src/labelbox/schema/queue_mode.py +++ b/libs/labelbox/src/labelbox/schema/queue_mode.py @@ -4,9 +4,3 @@ class QueueMode(str, Enum): Batch = "BATCH" Dataset = "DATA_SET" - - @classmethod - def _missing_(cls, value): - # Parses the deprecated "CATALOG" value back to QueueMode.Batch. - if value == "CATALOG": - return QueueMode.Batch diff --git a/libs/labelbox/src/labelbox/schema/slice.py b/libs/labelbox/src/labelbox/schema/slice.py index 624731024..9a0ae912e 100644 --- a/libs/labelbox/src/labelbox/schema/slice.py +++ b/libs/labelbox/src/labelbox/schema/slice.py @@ -53,43 +53,6 @@ class CatalogSlice(Slice): Represents a Slice used for filtering data rows in Catalog. """ - def get_data_row_ids(self) -> PaginatedCollection: - """ - Fetches all data row ids that match this Slice - - Returns: - A PaginatedCollection of mapping of data row ids to global keys - """ - - warnings.warn( - "get_data_row_ids will be deprecated. Use get_data_row_identifiers instead" - ) - - query_str = """ - query getDataRowIdsBySavedQueryPyApi($id: ID!, $from: String, $first: Int!) { - getDataRowIdsBySavedQuery(input: { - savedQueryId: $id, - after: $from - first: $first - }) { - totalCount - nodes - pageInfo { - endCursor - hasNextPage - } - } - } - """ - return PaginatedCollection( - client=self.client, - query=query_str, - params={"id": str(self.uid)}, - dereferencing=["getDataRowIdsBySavedQuery", "nodes"], - obj_class=lambda _, data_row_id: data_row_id, - cursor_path=["getDataRowIdsBySavedQuery", "pageInfo", "endCursor"], - ) - def get_data_row_identifiers(self) -> PaginatedCollection: """ Fetches all data row ids and global keys (where defined) that match this Slice diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 7d777a28a..f68f1059e 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -500,8 +500,6 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): [ ("create_data_rows", "class"), ("create_data_rows", "dict"), - ("create_data_rows_sync", "class"), - ("create_data_rows_sync", "dict"), ("create_data_row", "class"), ("create_data_row", "dict"), ], @@ -539,7 +537,6 @@ def create_data_row(data_rows): CREATION_FUNCTION = { "create_data_rows": dataset.create_data_rows, - "create_data_rows_sync": dataset.create_data_rows_sync, "create_data_row": create_data_row, } data_rows = [METADATA_FIELDS[metadata_obj_type]] @@ -804,49 +801,6 @@ def test_data_row_attachments(dataset, image_url): ) -def test_create_data_rows_sync_attachments(dataset, image_url): - attachments = [ - ("IMAGE", image_url, "image URL"), - ("RAW_TEXT", "test-text", None), - ("IMAGE_OVERLAY", image_url, "Overlay"), - ("HTML", image_url, None), - ] - attachments_per_data_row = 3 - dataset.create_data_rows_sync( - [ - { - "row_data": image_url, - "external_id": "test-id", - "attachments": [ - { - "type": attachment_type, - "value": attachment_value, - "name": attachment_name, - } - for _ in range(attachments_per_data_row) - ], - } - for attachment_type, attachment_value, attachment_name in attachments - ] - ) - data_rows = list(dataset.data_rows()) - assert len(data_rows) == len(attachments) - for data_row in data_rows: - assert len(list(data_row.attachments())) == attachments_per_data_row - - -def test_create_data_rows_sync_mixed_upload(dataset, image_url): - n_local = 100 - n_urls = 100 - with NamedTemporaryFile() as fp: - fp.write("Test data".encode()) - fp.flush() - dataset.create_data_rows_sync( - [{DataRow.row_data: image_url}] * n_urls + [fp.name] * n_local - ) - assert len(list(dataset.data_rows())) == n_local + n_urls - - def test_create_data_row_attachment(data_row): att = data_row.create_attachment( "IMAGE", "https://example.com/image.jpg", "name" @@ -1086,53 +1040,6 @@ def test_data_row_delete_and_create_with_same_global_key( assert task.result[0]["global_key"] == global_key_1 -def test_data_row_bulk_creation_sync_with_unique_global_keys( - dataset, sample_image -): - global_key_1 = str(uuid.uuid4()) - global_key_2 = str(uuid.uuid4()) - global_key_3 = str(uuid.uuid4()) - - dataset.create_data_rows_sync( - [ - {DataRow.row_data: sample_image, DataRow.global_key: global_key_1}, - {DataRow.row_data: sample_image, DataRow.global_key: global_key_2}, - {DataRow.row_data: sample_image, DataRow.global_key: global_key_3}, - ] - ) - - assert {row.global_key for row in dataset.data_rows()} == { - global_key_1, - global_key_2, - global_key_3, - } - - -def test_data_row_bulk_creation_sync_with_same_global_keys( - dataset, sample_image -): - global_key_1 = str(uuid.uuid4()) - - with pytest.raises(ResourceCreationError) as exc_info: - dataset.create_data_rows_sync( - [ - { - DataRow.row_data: sample_image, - DataRow.global_key: global_key_1, - }, - { - DataRow.row_data: sample_image, - DataRow.global_key: global_key_1, - }, - ] - ) - - assert len(list(dataset.data_rows())) == 1 - assert list(dataset.data_rows())[0].global_key == global_key_1 - assert "Duplicate global key" in str(exc_info.value) - assert exc_info.value.args[1] # task id - - @pytest.fixture def conversational_data_rows(dataset, conversational_content): examples = [ @@ -1174,7 +1081,7 @@ def test_invalid_media_type(dataset, conversational_content): # TODO: What error kind should this be? It looks like for global key we are # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(ResourceCreationError): - dataset.create_data_rows_sync( + dataset.( [{**conversational_content, "media_type": "IMAGE"}] ) @@ -1184,7 +1091,8 @@ def test_create_tiled_layer(dataset, tile_content): {**tile_content, "media_type": "TMS_GEO"}, tile_content, ] - dataset.create_data_rows_sync(examples) + task = dataset.create_data_rows(examples) + task.wait_until_done() data_rows = list(dataset.data_rows()) assert len(data_rows) == len(examples) for data_row in data_rows: diff --git a/libs/labelbox/tests/unit/test_queue_mode.py b/libs/labelbox/tests/unit/test_queue_mode.py index a07b14a54..0711b13af 100644 --- a/libs/labelbox/tests/unit/test_queue_mode.py +++ b/libs/labelbox/tests/unit/test_queue_mode.py @@ -3,10 +3,6 @@ from labelbox.schema.queue_mode import QueueMode -def test_parse_deprecated_catalog(): - assert QueueMode("CATALOG") == QueueMode.Batch - - def test_parse_batch(): assert QueueMode("BATCH") == QueueMode.Batch From 329277144d8b948ddf4b1edbea0c0dd72a3976ba Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:32:46 -0500 Subject: [PATCH 03/18] Removed assign_feature_schema_ids --- libs/labelbox/src/labelbox/client.py | 4 - .../data/annotation_types/collection.py | 15 -- .../labelbox/data/annotation_types/label.py | 36 --- .../data/annotation_types/test_collection.py | 21 -- .../tests/data/annotation_types/test_label.py | 208 ------------------ 5 files changed, 284 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 4498531ce..611871dbb 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -1644,10 +1644,6 @@ def get_data_row_ids_for_global_keys( """ Gets data row ids for a list of global keys. - Deprecation Notice: This function will soon no longer return 'Deleted Data Rows' - as part of the 'results'. Global keys for deleted data rows will soon be placed - under 'Data Row not found' portion. - Args: A list of global keys Returns: diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 2e76176a8..51bcce1b2 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -25,21 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs): self._fns = {} super().__init__(data, *args, **kwargs) - def assign_feature_schema_ids( - self, ontology_builder: "ontology.OntologyBuilder" - ) -> "LabelGenerator": - def _assign_ids(label: Label): - label.assign_feature_schema_ids(ontology_builder) - return label - - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Feature schema ids" - " are no longer required for importing." - ) - self._fns["assign_feature_schema_ids"] = _assign_ids - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 8ae05f898..2f835b23c 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -136,42 +136,6 @@ def create_data_row( self.data.external_id = data_row.external_id return self - def assign_feature_schema_ids( - self, ontology_builder: ontology.OntologyBuilder - ) -> "Label": - """ - Adds schema ids to all FeatureSchema objects in the Labels. - - Args: - ontology_builder: The ontology that matches the feature names assigned to objects in this dataset - Returns: - Label. useful for chaining these modifying functions - - Note: You can now import annotations using names directly without having to lookup schema_ids - """ - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Feature schema ids" - " are no longer required for importing." - ) - tool_lookup, classification_lookup = get_feature_schema_lookup( - ontology_builder - ) - for annotation in self.annotations: - if isinstance(annotation, ClassificationAnnotation): - self._assign_or_raise(annotation, classification_lookup) - self._assign_option(annotation, classification_lookup) - elif isinstance(annotation, ObjectAnnotation): - self._assign_or_raise(annotation, tool_lookup) - for classification in annotation.classifications: - self._assign_or_raise(classification, classification_lookup) - self._assign_option(classification, classification_lookup) - else: - raise TypeError( - f"Unexpected type found for annotation. {type(annotation)}" - ) - return self - def _assign_or_raise(self, annotation, lookup: Dict[str, str]) -> None: if annotation.feature_schema_id is not None: return diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index f818b94ff..e0fa7bd53 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -72,27 +72,6 @@ def test_conversion(list_of_labels): assert [x for x in label_collection] == list_of_labels -def test_adding_schema_ids(): - name = "line_feature" - label = Label( - data=GenericDataRowData(uid="123456"), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - ) - ], - ) - feature_schema_id = "expected_id" - ontology = OntologyBuilder( - tools=[ - Tool(Tool.Type.LINE, name=name, feature_schema_id=feature_schema_id) - ] - ) - generator = LabelGenerator([label]).assign_feature_schema_ids(ontology) - assert next(generator).annotations[0].feature_schema_id == feature_schema_id - - def test_adding_to_masks(signer): label = Label( data=GenericDataRowData(uid="12345"), diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 8439837ed..9cd992b0c 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -23,214 +23,6 @@ import pytest -def test_schema_assignment_geometry(): - name = "line_feature" - label = Label( - data=MaskData( - arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" - ), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - ) - ], - ) - feature_schema_id = "expected_id" - ontology = OntologyBuilder( - tools=[ - Tool(Tool.Type.LINE, name=name, feature_schema_id=feature_schema_id) - ] - ) - label.assign_feature_schema_ids(ontology) - - assert label.annotations[0].feature_schema_id == feature_schema_id - - -def test_schema_assignment_classification(): - radio_name = "radio_name" - text_name = "text_name" - option_name = "my_option" - - label = Label( - data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), - annotations=[ - ClassificationAnnotation( - value=Radio(answer=ClassificationAnswer(name=option_name)), - name=radio_name, - ), - ClassificationAnnotation( - value=Text(answer="some text"), name=text_name - ), - ], - ) - radio_schema_id = "radio_schema_id" - text_schema_id = "text_schema_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[], - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=radio_schema_id, - options=[ - Option( - value=option_name, feature_schema_id=option_schema_id - ) - ], - ), - OClassification( - class_type=OClassification.Type.TEXT, - name=text_name, - feature_schema_id=text_schema_id, - ), - ], - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == radio_schema_id - assert label.annotations[1].feature_schema_id == text_schema_id - assert ( - label.annotations[0].value.answer.feature_schema_id == option_schema_id - ) - - -def test_schema_assignment_subclass(): - name = "line_feature" - radio_name = "radio_name" - option_name = "my_option" - classification = ClassificationAnnotation( - name=radio_name, - value=Radio(answer=ClassificationAnswer(name=option_name)), - ) - label = Label( - data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - classifications=[classification], - ) - ], - ) - feature_schema_id = "expected_id" - classification_schema_id = "classification_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[ - Tool( - Tool.Type.LINE, - name=name, - feature_schema_id=feature_schema_id, - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=classification_schema_id, - options=[ - Option( - value=option_name, - feature_schema_id=option_schema_id, - ) - ], - ) - ], - ) - ] - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == feature_schema_id - assert ( - label.annotations[0].classifications[0].feature_schema_id - == classification_schema_id - ) - assert ( - label.annotations[0].classifications[0].value.answer.feature_schema_id - == option_schema_id - ) - - -def test_highly_nested(): - name = "line_feature" - radio_name = "radio_name" - nested_name = "nested_name" - option_name = "my_option" - nested_option_name = "nested_option_name" - classification = ClassificationAnnotation( - name=radio_name, - value=Radio(answer=ClassificationAnswer(name=option_name)), - classifications=[ - ClassificationAnnotation( - value=Radio( - answer=ClassificationAnswer(name=nested_option_name) - ), - name=nested_name, - ) - ], - ) - label = Label( - data=MaskData( - arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" - ), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - classifications=[classification], - ) - ], - ) - feature_schema_id = "expected_id" - classification_schema_id = "classification_id" - nested_classification_schema_id = "nested_classification_schema_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[ - Tool( - Tool.Type.LINE, - name=name, - feature_schema_id=feature_schema_id, - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=classification_schema_id, - options=[ - Option( - value=option_name, - feature_schema_id=option_schema_id, - options=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=nested_name, - feature_schema_id=nested_classification_schema_id, - options=[ - Option( - value=nested_option_name, - feature_schema_id=nested_classification_schema_id, - ) - ], - ) - ], - ) - ], - ) - ], - ) - ] - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == feature_schema_id - assert ( - label.annotations[0].classifications[0].feature_schema_id - == classification_schema_id - ) - assert ( - label.annotations[0].classifications[0].value.answer.feature_schema_id - == option_schema_id - ) - - def test_schema_assignment_confidence(): name = "line_feature" label = Label( From 23d7e59c8828baea770e24492584eabcd9575d60 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:49:20 -0500 Subject: [PATCH 04/18] removed rest of deprecated --- .../src/labelbox/schema/asset_attachment.py | 11 +-- .../src/labelbox/schema/data_row_metadata.py | 24 +---- libs/labelbox/src/labelbox/schema/dataset.py | 38 +------ .../src/labelbox/schema/queue_mode.py | 6 -- libs/labelbox/src/labelbox/schema/slice.py | 37 ------- .../tests/integration/test_data_rows.py | 98 +------------------ libs/labelbox/tests/unit/test_queue_mode.py | 4 - 7 files changed, 9 insertions(+), 209 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/asset_attachment.py b/libs/labelbox/src/labelbox/schema/asset_attachment.py index 0d5598c84..9a56dbb72 100644 --- a/libs/labelbox/src/labelbox/schema/asset_attachment.py +++ b/libs/labelbox/src/labelbox/schema/asset_attachment.py @@ -7,15 +7,6 @@ class AttachmentType(str, Enum): - @classmethod - def __missing__(cls, value: object): - if str(value) == "TEXT": - warnings.warn( - "The TEXT attachment type is deprecated. Use RAW_TEXT instead." - ) - return cls.RAW_TEXT - return value - VIDEO = "VIDEO" IMAGE = "IMAGE" IMAGE_OVERLAY = "IMAGE_OVERLAY" @@ -30,7 +21,7 @@ class AssetAttachment(DbObject): """Asset attachment provides extra context about an asset while labeling. Attributes: - attachment_type (str): IMAGE, VIDEO, IMAGE_OVERLAY, HTML, RAW_TEXT, TEXT_URL, or PDF_URL. TEXT attachment type is deprecated. + attachment_type (str): IMAGE, VIDEO, IMAGE_OVERLAY, HTML, RAW_TEXT, TEXT_URL, or PDF_URL. attachment_value (str): URL to an external file or a string of text attachment_name (str): The name of the attachment """ diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 2fd90e2f9..8bed7d6f5 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -1,5 +1,4 @@ # type: ignore -import warnings from copy import deepcopy from datetime import datetime from enum import Enum @@ -673,29 +672,14 @@ def bulk_delete( if not len(deletes): raise ValueError("The 'deletes' list cannot be empty.") - passed_strings = False - for i, delete in enumerate(deletes): - if isinstance(delete.data_row_id, str): - passed_strings = True - deletes[i] = DeleteDataRowMetadata( - data_row_id=UniqueId(delete.data_row_id), - fields=delete.fields, - ) - elif isinstance(delete.data_row_id, UniqueId): - continue - elif isinstance(delete.data_row_id, GlobalKey): - continue - else: + for delete in enumerate(deletes): + if not isinstance(delete.data_row_id, UniqueId) or not isinstance( + delete.data_row_id, GlobalKey + ): raise ValueError( f"Invalid data row identifier type '{type(delete.data_row_id)}' for '{delete.data_row_id}'" ) - if passed_strings: - warnings.warn( - "Using string for data row id will be deprecated. Please use " - "UniqueId instead." - ) - def _batch_delete( deletes: List[_DeleteBatchDataRowMetadata], ) -> List[DataRowMetadataBatchResponse]: diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index 6d879e767..ac709a60b 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -166,47 +166,13 @@ def create_data_row(self, items=None, **kwargs) -> "DataRow": return self.client.get_data_row(res[0]["id"]) - def create_data_rows_sync( - self, items, file_upload_thread_count=FILE_UPLOAD_THREAD_COUNT - ) -> None: - """Synchronously bulk upload data rows. - - Use this instead of `Dataset.create_data_rows` for smaller batches of data rows that need to be uploaded quickly. - Cannot use this for uploads containing more than 1000 data rows. - Each data row is also limited to 5 attachments. - - Args: - items (iterable of (dict or str)): - See the docstring for `Dataset._create_descriptor_file` for more information. - Returns: - None. If the function doesn't raise an exception then the import was successful. - - Raises: - ResourceCreationError: If the `items` parameter does not conform to - the specification in Dataset._create_descriptor_file or if the server did not accept the - DataRow creation request (unknown reason). - InvalidAttributeError: If there are fields in `items` not valid for - a DataRow. - ValueError: When the upload parameters are invalid - """ - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Please use create_data_rows instead." - ) - - self._create_data_rows_sync( - items, file_upload_thread_count=file_upload_thread_count - ) - - return None # Return None if no exception is raised - def _create_data_rows_sync( self, items, file_upload_thread_count=FILE_UPLOAD_THREAD_COUNT ) -> "DataUpsertTask": max_data_rows_supported = 1000 if len(items) > max_data_rows_supported: raise ValueError( - f"Dataset.create_data_rows_sync() supports a max of {max_data_rows_supported} data rows." + f"Dataset._create_data_rows_sync() supports a max of {max_data_rows_supported} data rows." " For larger imports use the async function Dataset.create_data_rows()" ) if file_upload_thread_count < 1: @@ -235,8 +201,6 @@ def create_data_rows( ) -> "DataUpsertTask": """Asynchronously bulk upload data rows - Use this instead of `Dataset.create_data_rows_sync` uploads for batches that contain more than 1000 data rows. - Args: items (iterable of (dict or str)) diff --git a/libs/labelbox/src/labelbox/schema/queue_mode.py b/libs/labelbox/src/labelbox/schema/queue_mode.py index 333e92987..fc1e850d9 100644 --- a/libs/labelbox/src/labelbox/schema/queue_mode.py +++ b/libs/labelbox/src/labelbox/schema/queue_mode.py @@ -4,9 +4,3 @@ class QueueMode(str, Enum): Batch = "BATCH" Dataset = "DATA_SET" - - @classmethod - def _missing_(cls, value): - # Parses the deprecated "CATALOG" value back to QueueMode.Batch. - if value == "CATALOG": - return QueueMode.Batch diff --git a/libs/labelbox/src/labelbox/schema/slice.py b/libs/labelbox/src/labelbox/schema/slice.py index 624731024..9a0ae912e 100644 --- a/libs/labelbox/src/labelbox/schema/slice.py +++ b/libs/labelbox/src/labelbox/schema/slice.py @@ -53,43 +53,6 @@ class CatalogSlice(Slice): Represents a Slice used for filtering data rows in Catalog. """ - def get_data_row_ids(self) -> PaginatedCollection: - """ - Fetches all data row ids that match this Slice - - Returns: - A PaginatedCollection of mapping of data row ids to global keys - """ - - warnings.warn( - "get_data_row_ids will be deprecated. Use get_data_row_identifiers instead" - ) - - query_str = """ - query getDataRowIdsBySavedQueryPyApi($id: ID!, $from: String, $first: Int!) { - getDataRowIdsBySavedQuery(input: { - savedQueryId: $id, - after: $from - first: $first - }) { - totalCount - nodes - pageInfo { - endCursor - hasNextPage - } - } - } - """ - return PaginatedCollection( - client=self.client, - query=query_str, - params={"id": str(self.uid)}, - dereferencing=["getDataRowIdsBySavedQuery", "nodes"], - obj_class=lambda _, data_row_id: data_row_id, - cursor_path=["getDataRowIdsBySavedQuery", "pageInfo", "endCursor"], - ) - def get_data_row_identifiers(self) -> PaginatedCollection: """ Fetches all data row ids and global keys (where defined) that match this Slice diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 7d777a28a..f68f1059e 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -500,8 +500,6 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): [ ("create_data_rows", "class"), ("create_data_rows", "dict"), - ("create_data_rows_sync", "class"), - ("create_data_rows_sync", "dict"), ("create_data_row", "class"), ("create_data_row", "dict"), ], @@ -539,7 +537,6 @@ def create_data_row(data_rows): CREATION_FUNCTION = { "create_data_rows": dataset.create_data_rows, - "create_data_rows_sync": dataset.create_data_rows_sync, "create_data_row": create_data_row, } data_rows = [METADATA_FIELDS[metadata_obj_type]] @@ -804,49 +801,6 @@ def test_data_row_attachments(dataset, image_url): ) -def test_create_data_rows_sync_attachments(dataset, image_url): - attachments = [ - ("IMAGE", image_url, "image URL"), - ("RAW_TEXT", "test-text", None), - ("IMAGE_OVERLAY", image_url, "Overlay"), - ("HTML", image_url, None), - ] - attachments_per_data_row = 3 - dataset.create_data_rows_sync( - [ - { - "row_data": image_url, - "external_id": "test-id", - "attachments": [ - { - "type": attachment_type, - "value": attachment_value, - "name": attachment_name, - } - for _ in range(attachments_per_data_row) - ], - } - for attachment_type, attachment_value, attachment_name in attachments - ] - ) - data_rows = list(dataset.data_rows()) - assert len(data_rows) == len(attachments) - for data_row in data_rows: - assert len(list(data_row.attachments())) == attachments_per_data_row - - -def test_create_data_rows_sync_mixed_upload(dataset, image_url): - n_local = 100 - n_urls = 100 - with NamedTemporaryFile() as fp: - fp.write("Test data".encode()) - fp.flush() - dataset.create_data_rows_sync( - [{DataRow.row_data: image_url}] * n_urls + [fp.name] * n_local - ) - assert len(list(dataset.data_rows())) == n_local + n_urls - - def test_create_data_row_attachment(data_row): att = data_row.create_attachment( "IMAGE", "https://example.com/image.jpg", "name" @@ -1086,53 +1040,6 @@ def test_data_row_delete_and_create_with_same_global_key( assert task.result[0]["global_key"] == global_key_1 -def test_data_row_bulk_creation_sync_with_unique_global_keys( - dataset, sample_image -): - global_key_1 = str(uuid.uuid4()) - global_key_2 = str(uuid.uuid4()) - global_key_3 = str(uuid.uuid4()) - - dataset.create_data_rows_sync( - [ - {DataRow.row_data: sample_image, DataRow.global_key: global_key_1}, - {DataRow.row_data: sample_image, DataRow.global_key: global_key_2}, - {DataRow.row_data: sample_image, DataRow.global_key: global_key_3}, - ] - ) - - assert {row.global_key for row in dataset.data_rows()} == { - global_key_1, - global_key_2, - global_key_3, - } - - -def test_data_row_bulk_creation_sync_with_same_global_keys( - dataset, sample_image -): - global_key_1 = str(uuid.uuid4()) - - with pytest.raises(ResourceCreationError) as exc_info: - dataset.create_data_rows_sync( - [ - { - DataRow.row_data: sample_image, - DataRow.global_key: global_key_1, - }, - { - DataRow.row_data: sample_image, - DataRow.global_key: global_key_1, - }, - ] - ) - - assert len(list(dataset.data_rows())) == 1 - assert list(dataset.data_rows())[0].global_key == global_key_1 - assert "Duplicate global key" in str(exc_info.value) - assert exc_info.value.args[1] # task id - - @pytest.fixture def conversational_data_rows(dataset, conversational_content): examples = [ @@ -1174,7 +1081,7 @@ def test_invalid_media_type(dataset, conversational_content): # TODO: What error kind should this be? It looks like for global key we are # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(ResourceCreationError): - dataset.create_data_rows_sync( + dataset.( [{**conversational_content, "media_type": "IMAGE"}] ) @@ -1184,7 +1091,8 @@ def test_create_tiled_layer(dataset, tile_content): {**tile_content, "media_type": "TMS_GEO"}, tile_content, ] - dataset.create_data_rows_sync(examples) + task = dataset.create_data_rows(examples) + task.wait_until_done() data_rows = list(dataset.data_rows()) assert len(data_rows) == len(examples) for data_row in data_rows: diff --git a/libs/labelbox/tests/unit/test_queue_mode.py b/libs/labelbox/tests/unit/test_queue_mode.py index a07b14a54..0711b13af 100644 --- a/libs/labelbox/tests/unit/test_queue_mode.py +++ b/libs/labelbox/tests/unit/test_queue_mode.py @@ -3,10 +3,6 @@ from labelbox.schema.queue_mode import QueueMode -def test_parse_deprecated_catalog(): - assert QueueMode("CATALOG") == QueueMode.Batch - - def test_parse_batch(): assert QueueMode("BATCH") == QueueMode.Batch From d088d6349d3764b2862b22c49c58c31a9fba9f50 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:39:46 -0500 Subject: [PATCH 05/18] Fixed bad tests --- .../src/labelbox/schema/data_row_metadata.py | 24 ++++----------- .../test_data_row_delete_metadata.py | 30 ++++--------------- .../tests/integration/test_data_rows.py | 2 +- 3 files changed, 12 insertions(+), 44 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 8bed7d6f5..386d5939b 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -14,6 +14,7 @@ Type, Union, overload, + get_args, ) from pydantic import ( @@ -27,7 +28,7 @@ ) from typing_extensions import Annotated -from labelbox.schema.identifiable import GlobalKey, UniqueId +from labelbox.schema.identifiable import GlobalKey, UniqueId, DataRowIdentifier from labelbox.schema.identifiables import DataRowIdentifiers, UniqueIds from labelbox.schema.ontology import SchemaId from labelbox.utils import ( @@ -87,7 +88,7 @@ class DataRowMetadata(_CamelCaseMixin): class DeleteDataRowMetadata(_CamelCaseMixin): - data_row_id: Union[str, UniqueId, GlobalKey] = None + data_row_id: Union[UniqueId, GlobalKey] = None fields: List[SchemaId] @@ -646,21 +647,10 @@ def bulk_delete( >>> ) >>> mdo.batch_delete([metadata]) - >>> delete = DeleteDataRowMetadata( - >>> data_row_id="global-key", - >>> fields=[ - >>> "schema-id-1", - >>> "schema-id-2" - >>> ... - >>> ] - >>> ) - >>> mdo.batch_delete([metadata]) - Args: deletes: Data row and schema ids to delete - For data row, we support UniqueId, str, and GlobalKey. - If you pass a str, we will assume it is a UniqueId + For data row, we support UniqueId and GlobalKey. Do not pass a mix of data row ids and global keys in the same list Returns: @@ -672,10 +662,8 @@ def bulk_delete( if not len(deletes): raise ValueError("The 'deletes' list cannot be empty.") - for delete in enumerate(deletes): - if not isinstance(delete.data_row_id, UniqueId) or not isinstance( - delete.data_row_id, GlobalKey - ): + for delete in deletes: + if not isinstance(delete.data_row_id, get_args(DataRowIdentifier)): raise ValueError( f"Invalid data row identifier type '{type(delete.data_row_id)}' for '{delete.data_row_id}'" ) diff --git a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py index a2ffd31ba..ad9c9e1ee 100644 --- a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py +++ b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py @@ -121,14 +121,9 @@ def data_row_global_key(data_row): return GlobalKey(data_row.global_key) -@pytest.fixture -def data_row_id_as_str(data_row): - return data_row.uid - - @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -154,7 +149,7 @@ def test_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_bulk_partial_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -195,21 +190,6 @@ def data_row_unique_ids(big_dataset): return deletes -@pytest.fixture -def data_row_ids_as_str(big_dataset): - deletes = [] - data_row_ids = [dr.uid for dr in big_dataset.data_rows()] - - for data_row_id in data_row_ids: - deletes.append( - DeleteDataRowMetadata( - data_row_id=data_row_id, - fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID], - ) - ) - return deletes - - @pytest.fixture def data_row_global_keys(big_dataset): deletes = [] @@ -227,7 +207,7 @@ def data_row_global_keys(big_dataset): @pytest.mark.parametrize( "data_rows_for_delete", - ["data_row_ids_as_str", "data_row_unique_ids", "data_row_global_keys"], + ["data_row_unique_ids", "data_row_global_keys"], ) def test_large_bulk_delete_datarow_metadata( data_rows_for_delete, big_dataset, mdo, request @@ -267,7 +247,7 @@ def test_large_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_enum_metadata( data_row_for_delete, @@ -304,7 +284,7 @@ def test_bulk_delete_datarow_enum_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_delete_non_existent_schema_id( data_row_for_delete, data_row, mdo, request diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index f68f1059e..481385e75 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -1081,7 +1081,7 @@ def test_invalid_media_type(dataset, conversational_content): # TODO: What error kind should this be? It looks like for global key we are # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(ResourceCreationError): - dataset.( + dataset._create_data_rows_sync( [{**conversational_content, "media_type": "IMAGE"}] ) From 4d6f90d4cdad254e0dbc680c582ad0fb70821d54 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:52:22 -0500 Subject: [PATCH 06/18] Revert tests --- .../src/labelbox/schema/data_row_metadata.py | 32 +++++++++++++++---- .../test_data_row_delete_metadata.py | 30 ++++++++++++++--- .../tests/integration/test_data_rows.py | 1 - 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 386d5939b..cb45ef57f 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -1,4 +1,5 @@ # type: ignore +import warnings from copy import deepcopy from datetime import datetime from enum import Enum @@ -14,7 +15,6 @@ Type, Union, overload, - get_args, ) from pydantic import ( @@ -28,7 +28,7 @@ ) from typing_extensions import Annotated -from labelbox.schema.identifiable import GlobalKey, UniqueId, DataRowIdentifier +from labelbox.schema.identifiable import GlobalKey, UniqueId from labelbox.schema.identifiables import DataRowIdentifiers, UniqueIds from labelbox.schema.ontology import SchemaId from labelbox.utils import ( @@ -88,7 +88,7 @@ class DataRowMetadata(_CamelCaseMixin): class DeleteDataRowMetadata(_CamelCaseMixin): - data_row_id: Union[UniqueId, GlobalKey] = None + data_row_id: Union[str, UniqueId, GlobalKey] = None fields: List[SchemaId] @@ -647,10 +647,21 @@ def bulk_delete( >>> ) >>> mdo.batch_delete([metadata]) + >>> delete = DeleteDataRowMetadata( + >>> data_row_id="global-key", + >>> fields=[ + >>> "schema-id-1", + >>> "schema-id-2" + >>> ... + >>> ] + >>> ) + >>> mdo.batch_delete([metadata]) + Args: deletes: Data row and schema ids to delete - For data row, we support UniqueId and GlobalKey. + For data row, we support UniqueId, str, and GlobalKey. + If you pass a str, we will assume it is a UniqueId Do not pass a mix of data row ids and global keys in the same list Returns: @@ -662,8 +673,17 @@ def bulk_delete( if not len(deletes): raise ValueError("The 'deletes' list cannot be empty.") - for delete in deletes: - if not isinstance(delete.data_row_id, get_args(DataRowIdentifier)): + for i, delete in enumerate(deletes): + if isinstance(delete.data_row_id, str): + deletes[i] = DeleteDataRowMetadata( + data_row_id=UniqueId(delete.data_row_id), + fields=delete.fields, + ) + elif isinstance(delete.data_row_id, UniqueId): + continue + elif isinstance(delete.data_row_id, GlobalKey): + continue + else: raise ValueError( f"Invalid data row identifier type '{type(delete.data_row_id)}' for '{delete.data_row_id}'" ) diff --git a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py index ad9c9e1ee..a2ffd31ba 100644 --- a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py +++ b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py @@ -121,9 +121,14 @@ def data_row_global_key(data_row): return GlobalKey(data_row.global_key) +@pytest.fixture +def data_row_id_as_str(data_row): + return data_row.uid + + @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -149,7 +154,7 @@ def test_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_bulk_partial_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -190,6 +195,21 @@ def data_row_unique_ids(big_dataset): return deletes +@pytest.fixture +def data_row_ids_as_str(big_dataset): + deletes = [] + data_row_ids = [dr.uid for dr in big_dataset.data_rows()] + + for data_row_id in data_row_ids: + deletes.append( + DeleteDataRowMetadata( + data_row_id=data_row_id, + fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID], + ) + ) + return deletes + + @pytest.fixture def data_row_global_keys(big_dataset): deletes = [] @@ -207,7 +227,7 @@ def data_row_global_keys(big_dataset): @pytest.mark.parametrize( "data_rows_for_delete", - ["data_row_unique_ids", "data_row_global_keys"], + ["data_row_ids_as_str", "data_row_unique_ids", "data_row_global_keys"], ) def test_large_bulk_delete_datarow_metadata( data_rows_for_delete, big_dataset, mdo, request @@ -247,7 +267,7 @@ def test_large_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_enum_metadata( data_row_for_delete, @@ -284,7 +304,7 @@ def test_bulk_delete_datarow_enum_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_delete_non_existent_schema_id( data_row_for_delete, data_row, mdo, request diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index ffc51329c..ea66a6073 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -1081,7 +1081,6 @@ def test_invalid_media_type(dataset, conversational_content): # TODO: What error kind should this be? It looks like for global key we are # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(ResourceCreationError): - dataset.( dataset._create_data_rows_sync( [{**conversational_content, "media_type": "IMAGE"}] ) From 4534e5cf2a24f179c17ec41655bea5c558b92cb1 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:30:57 -0500 Subject: [PATCH 07/18] Remove queue mode as something to set --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/client.py | 10 ++-------- libs/labelbox/src/labelbox/project_validation.py | 4 +--- libs/labelbox/src/labelbox/schema/project.py | 12 ------------ libs/labelbox/src/labelbox/schema/queue_mode.py | 6 ------ libs/labelbox/src/labelbox/schema/user_group.py | 2 -- .../tests/unit/schema/test_user_group.py | 2 -- libs/labelbox/tests/unit/test_project.py | 2 -- libs/labelbox/tests/unit/test_queue_mode.py | 16 ---------------- 9 files changed, 3 insertions(+), 52 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/queue_mode.py delete mode 100644 libs/labelbox/tests/unit/test_queue_mode.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 7f7081947..850aec0be 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -59,7 +59,6 @@ from labelbox.schema.project_resource_tag import ProjectResourceTag from labelbox.schema.media_type import MediaType from labelbox.schema.slice import Slice, CatalogSlice, ModelSlice -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.task_queue import TaskQueue from labelbox.schema.label_score import LabelScore from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 611871dbb..dc0567d7e 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -72,7 +72,6 @@ CONSENSUS_AUTO_AUDIT_PERCENTAGE, QualityMode, ) -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.role import Role from labelbox.schema.search_filters import SearchFilter from labelbox.schema.send_to_annotate_params import ( @@ -465,16 +464,16 @@ def _create(self, db_object_type, data, extra_params={}): data = {**data, **extra_params} query_string, params = query.create(db_object_type, data) + print(query_string) res = self.execute( query_string, params, raise_return_resource_not_found=True ) - if not res: raise LabelboxError( "Failed to create %s" % db_object_type.type_name() ) res = res["create%s" % db_object_type.type_name()] - + print(res) return db_object_type(self, res) def create_model_config( @@ -621,7 +620,6 @@ def create_project( name (str): A name for the project description (str): A short summary for the project media_type (MediaType): The type of assets that this project will accept - queue_mode (Optional[QueueMode]): The queue mode to use quality_modes (Optional[List[QualityMode]]): The quality modes to use (e.g. Benchmark, Consensus). Defaults to Benchmark. is_benchmark_enabled (Optional[bool]): Whether the project supports benchmark. Defaults to None. @@ -853,11 +851,7 @@ def create_response_creation_project( return self._create_project(_CoreProjectInput(**input)) def _create_project(self, input: _CoreProjectInput) -> Project: - media_type_value = input.media_type.value - params = input.model_dump(exclude_none=True) - if media_type_value: - params["media_type"] = media_type_value extra_params = { Field.String("dataset_name_or_id"): params.pop( diff --git a/libs/labelbox/src/labelbox/project_validation.py b/libs/labelbox/src/labelbox/project_validation.py index 8940dd161..41f1fa762 100644 --- a/libs/labelbox/src/labelbox/project_validation.py +++ b/libs/labelbox/src/labelbox/project_validation.py @@ -11,7 +11,6 @@ CONSENSUS_AUTO_AUDIT_PERCENTAGE, QualityMode, ) -from labelbox.schema.queue_mode import QueueMode PositiveInt = Annotated[int, Field(gt=0)] @@ -20,7 +19,6 @@ class _CoreProjectInput(BaseModel): name: str description: Optional[str] = None media_type: MediaType - queue_mode: QueueMode = Field(default=QueueMode.Batch, frozen=True) auto_audit_percentage: Optional[float] = None auto_audit_number_of_labels: Optional[int] = None quality_modes: Optional[Set[QualityMode]] = Field( @@ -33,7 +31,7 @@ class _CoreProjectInput(BaseModel): data_row_count: Optional[PositiveInt] = None editor_task_type: Optional[EditorTaskType] = None - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", use_enum_values=True) @model_validator(mode="after") def validate_fields(self): diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 0daf3af10..2205a05e2 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -61,7 +61,6 @@ ProjectOverview, ProjectOverviewDetailed, ) -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.resource_tag import ResourceTag from labelbox.schema.task import Task from labelbox.schema.task_queue import TaskQueue @@ -109,7 +108,6 @@ class Project(DbObject, Updateable, Deletable): created_at (datetime) setup_complete (datetime) last_activity_time (datetime) - queue_mode (string) auto_audit_number_of_labels (int) auto_audit_percentage (float) is_benchmark_enabled (bool) @@ -132,7 +130,6 @@ class Project(DbObject, Updateable, Deletable): created_at = Field.DateTime("created_at") setup_complete = Field.DateTime("setup_complete") last_activity_time = Field.DateTime("last_activity_time") - queue_mode = Field.Enum(QueueMode, "queue_mode") auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels") auto_audit_percentage = Field.Float("auto_audit_percentage") # Bind data_type and allowedMediaTYpe using the GraphQL type MediaType @@ -734,9 +731,6 @@ def create_batch( Raises: lbox.exceptions.ValueError if a project is not batch mode, if the project is auto data generation, if the batch exceeds 100k data rows """ - # @TODO: make this automatic? - if self.queue_mode != QueueMode.Batch: - raise ValueError("Project must be in batch mode") if self.is_auto_data_generation(): raise ValueError( @@ -816,9 +810,6 @@ def create_batches( Returns: a task for the created batches """ - if self.queue_mode != QueueMode.Batch: - raise ValueError("Project must be in batch mode") - dr_ids = [] if data_rows is not None: for dr in data_rows: @@ -901,9 +892,6 @@ def create_batches_from_dataset( Returns: a task for the created batches """ - if self.queue_mode != QueueMode.Batch: - raise ValueError("Project must be in batch mode") - if consensus_settings: consensus_settings = ConsensusSettings( **consensus_settings diff --git a/libs/labelbox/src/labelbox/schema/queue_mode.py b/libs/labelbox/src/labelbox/schema/queue_mode.py deleted file mode 100644 index fc1e850d9..000000000 --- a/libs/labelbox/src/labelbox/schema/queue_mode.py +++ /dev/null @@ -1,6 +0,0 @@ -from enum import Enum - - -class QueueMode(str, Enum): - Batch = "BATCH" - Dataset = "DATA_SET" diff --git a/libs/labelbox/src/labelbox/schema/user_group.py b/libs/labelbox/src/labelbox/schema/user_group.py index 2dd9f76ca..2e93b4376 100644 --- a/libs/labelbox/src/labelbox/schema/user_group.py +++ b/libs/labelbox/src/labelbox/schema/user_group.py @@ -14,7 +14,6 @@ from labelbox.schema.media_type import MediaType from labelbox.schema.ontology_kind import EditorTaskType from labelbox.schema.project import Project -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.user import User @@ -411,7 +410,6 @@ def _get_projects_set(self, project_nodes): project_values = defaultdict(lambda: None) project_values["id"] = project["id"] project_values["name"] = project["name"] - project_values["queueMode"] = QueueMode.Batch.value project_values["editorTaskType"] = EditorTaskType.Missing.value project_values["mediaType"] = MediaType.Image.value projects.add(Project(self.client, project_values)) diff --git a/libs/labelbox/tests/unit/schema/test_user_group.py b/libs/labelbox/tests/unit/schema/test_user_group.py index 1df555a64..6bc29048d 100644 --- a/libs/labelbox/tests/unit/schema/test_user_group.py +++ b/libs/labelbox/tests/unit/schema/test_user_group.py @@ -14,7 +14,6 @@ from labelbox.schema.media_type import MediaType from labelbox.schema.ontology_kind import EditorTaskType from labelbox.schema.project import Project -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.user import User from labelbox.schema.user_group import UserGroup, UserGroupColor @@ -32,7 +31,6 @@ def group_project(): project_values = defaultdict(lambda: None) project_values["id"] = "project_id" project_values["name"] = "Test Project" - project_values["queueMode"] = QueueMode.Batch.value project_values["editorTaskType"] = EditorTaskType.Missing.value project_values["mediaType"] = MediaType.Image.value return Project(MagicMock(Client), project_values) diff --git a/libs/labelbox/tests/unit/test_project.py b/libs/labelbox/tests/unit/test_project.py index a8fd87b48..1bc6fa840 100644 --- a/libs/labelbox/tests/unit/test_project.py +++ b/libs/labelbox/tests/unit/test_project.py @@ -21,7 +21,6 @@ def project_entity(): "editorTaskType": "MODEL_CHAT_EVALUATION", "lastActivityTime": "2021-06-01T00:00:00.000Z", "allowedMediaType": "IMAGE", - "queueMode": "BATCH", "setupComplete": "2021-06-01T00:00:00.000Z", "modelSetupComplete": None, "uploadType": "Auto", @@ -62,7 +61,6 @@ def test_project_editor_task_type( "editorTaskType": api_editor_task_type, "lastActivityTime": "2021-06-01T00:00:00.000Z", "allowedMediaType": "IMAGE", - "queueMode": "BATCH", "setupComplete": "2021-06-01T00:00:00.000Z", "modelSetupComplete": None, "uploadType": "Auto", diff --git a/libs/labelbox/tests/unit/test_queue_mode.py b/libs/labelbox/tests/unit/test_queue_mode.py deleted file mode 100644 index 0711b13af..000000000 --- a/libs/labelbox/tests/unit/test_queue_mode.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from labelbox.schema.queue_mode import QueueMode - - -def test_parse_batch(): - assert QueueMode("BATCH") == QueueMode.Batch - - -def test_parse_data_set(): - assert QueueMode("DATA_SET") == QueueMode.Dataset - - -def test_fails_for_unknown(): - with pytest.raises(ValueError): - QueueMode("foo") From dc4208faf11e1eabb0caf84d2d405f01ba989b6b Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:32:03 -0500 Subject: [PATCH 08/18] Remove print --- libs/labelbox/src/labelbox/client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index dc0567d7e..aa08ab0b3 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -464,7 +464,6 @@ def _create(self, db_object_type, data, extra_params={}): data = {**data, **extra_params} query_string, params = query.create(db_object_type, data) - print(query_string) res = self.execute( query_string, params, raise_return_resource_not_found=True ) @@ -473,7 +472,6 @@ def _create(self, db_object_type, data, extra_params={}): "Failed to create %s" % db_object_type.type_name() ) res = res["create%s" % db_object_type.type_name()] - print(res) return db_object_type(self, res) def create_model_config( From fe4b191a86a9c2916d2443cdfd64de709730fd5d Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:47:51 -0500 Subject: [PATCH 09/18] Fix tests --- libs/labelbox/tests/integration/test_data_rows.py | 2 -- libs/labelbox/tests/integration/test_project.py | 1 - 2 files changed, 3 deletions(-) diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index ea66a6073..481385e75 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -1093,8 +1093,6 @@ def test_create_tiled_layer(dataset, tile_content): ] task = dataset.create_data_rows(examples) task.wait_until_done() - task = dataset.create_data_rows(examples) - task.wait_until_done() data_rows = list(dataset.data_rows()) assert len(data_rows) == len(examples) for data_row in data_rows: diff --git a/libs/labelbox/tests/integration/test_project.py b/libs/labelbox/tests/integration/test_project.py index 0f9d66036..ea995c6f6 100644 --- a/libs/labelbox/tests/integration/test_project.py +++ b/libs/labelbox/tests/integration/test_project.py @@ -315,7 +315,6 @@ def test_clone(client, project, rand_gen): assert cloned_project.description == project.description assert cloned_project.media_type == project.media_type - assert cloned_project.queue_mode == project.queue_mode assert ( cloned_project.auto_audit_number_of_labels == project.auto_audit_number_of_labels From beb986866af61b5633870c017886e94042fba7c4 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:32:46 -0500 Subject: [PATCH 10/18] Removed assign_feature_schema_ids --- libs/labelbox/src/labelbox/client.py | 4 - .../data/annotation_types/collection.py | 15 -- .../labelbox/data/annotation_types/label.py | 36 --- .../data/annotation_types/test_collection.py | 21 -- .../tests/data/annotation_types/test_label.py | 208 ------------------ 5 files changed, 284 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 4498531ce..611871dbb 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -1644,10 +1644,6 @@ def get_data_row_ids_for_global_keys( """ Gets data row ids for a list of global keys. - Deprecation Notice: This function will soon no longer return 'Deleted Data Rows' - as part of the 'results'. Global keys for deleted data rows will soon be placed - under 'Data Row not found' portion. - Args: A list of global keys Returns: diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 2e76176a8..51bcce1b2 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -25,21 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs): self._fns = {} super().__init__(data, *args, **kwargs) - def assign_feature_schema_ids( - self, ontology_builder: "ontology.OntologyBuilder" - ) -> "LabelGenerator": - def _assign_ids(label: Label): - label.assign_feature_schema_ids(ontology_builder) - return label - - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Feature schema ids" - " are no longer required for importing." - ) - self._fns["assign_feature_schema_ids"] = _assign_ids - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 8ae05f898..2f835b23c 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -136,42 +136,6 @@ def create_data_row( self.data.external_id = data_row.external_id return self - def assign_feature_schema_ids( - self, ontology_builder: ontology.OntologyBuilder - ) -> "Label": - """ - Adds schema ids to all FeatureSchema objects in the Labels. - - Args: - ontology_builder: The ontology that matches the feature names assigned to objects in this dataset - Returns: - Label. useful for chaining these modifying functions - - Note: You can now import annotations using names directly without having to lookup schema_ids - """ - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Feature schema ids" - " are no longer required for importing." - ) - tool_lookup, classification_lookup = get_feature_schema_lookup( - ontology_builder - ) - for annotation in self.annotations: - if isinstance(annotation, ClassificationAnnotation): - self._assign_or_raise(annotation, classification_lookup) - self._assign_option(annotation, classification_lookup) - elif isinstance(annotation, ObjectAnnotation): - self._assign_or_raise(annotation, tool_lookup) - for classification in annotation.classifications: - self._assign_or_raise(classification, classification_lookup) - self._assign_option(classification, classification_lookup) - else: - raise TypeError( - f"Unexpected type found for annotation. {type(annotation)}" - ) - return self - def _assign_or_raise(self, annotation, lookup: Dict[str, str]) -> None: if annotation.feature_schema_id is not None: return diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index f818b94ff..e0fa7bd53 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -72,27 +72,6 @@ def test_conversion(list_of_labels): assert [x for x in label_collection] == list_of_labels -def test_adding_schema_ids(): - name = "line_feature" - label = Label( - data=GenericDataRowData(uid="123456"), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - ) - ], - ) - feature_schema_id = "expected_id" - ontology = OntologyBuilder( - tools=[ - Tool(Tool.Type.LINE, name=name, feature_schema_id=feature_schema_id) - ] - ) - generator = LabelGenerator([label]).assign_feature_schema_ids(ontology) - assert next(generator).annotations[0].feature_schema_id == feature_schema_id - - def test_adding_to_masks(signer): label = Label( data=GenericDataRowData(uid="12345"), diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 8439837ed..9cd992b0c 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -23,214 +23,6 @@ import pytest -def test_schema_assignment_geometry(): - name = "line_feature" - label = Label( - data=MaskData( - arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" - ), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - ) - ], - ) - feature_schema_id = "expected_id" - ontology = OntologyBuilder( - tools=[ - Tool(Tool.Type.LINE, name=name, feature_schema_id=feature_schema_id) - ] - ) - label.assign_feature_schema_ids(ontology) - - assert label.annotations[0].feature_schema_id == feature_schema_id - - -def test_schema_assignment_classification(): - radio_name = "radio_name" - text_name = "text_name" - option_name = "my_option" - - label = Label( - data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), - annotations=[ - ClassificationAnnotation( - value=Radio(answer=ClassificationAnswer(name=option_name)), - name=radio_name, - ), - ClassificationAnnotation( - value=Text(answer="some text"), name=text_name - ), - ], - ) - radio_schema_id = "radio_schema_id" - text_schema_id = "text_schema_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[], - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=radio_schema_id, - options=[ - Option( - value=option_name, feature_schema_id=option_schema_id - ) - ], - ), - OClassification( - class_type=OClassification.Type.TEXT, - name=text_name, - feature_schema_id=text_schema_id, - ), - ], - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == radio_schema_id - assert label.annotations[1].feature_schema_id == text_schema_id - assert ( - label.annotations[0].value.answer.feature_schema_id == option_schema_id - ) - - -def test_schema_assignment_subclass(): - name = "line_feature" - radio_name = "radio_name" - option_name = "my_option" - classification = ClassificationAnnotation( - name=radio_name, - value=Radio(answer=ClassificationAnswer(name=option_name)), - ) - label = Label( - data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - classifications=[classification], - ) - ], - ) - feature_schema_id = "expected_id" - classification_schema_id = "classification_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[ - Tool( - Tool.Type.LINE, - name=name, - feature_schema_id=feature_schema_id, - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=classification_schema_id, - options=[ - Option( - value=option_name, - feature_schema_id=option_schema_id, - ) - ], - ) - ], - ) - ] - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == feature_schema_id - assert ( - label.annotations[0].classifications[0].feature_schema_id - == classification_schema_id - ) - assert ( - label.annotations[0].classifications[0].value.answer.feature_schema_id - == option_schema_id - ) - - -def test_highly_nested(): - name = "line_feature" - radio_name = "radio_name" - nested_name = "nested_name" - option_name = "my_option" - nested_option_name = "nested_option_name" - classification = ClassificationAnnotation( - name=radio_name, - value=Radio(answer=ClassificationAnswer(name=option_name)), - classifications=[ - ClassificationAnnotation( - value=Radio( - answer=ClassificationAnswer(name=nested_option_name) - ), - name=nested_name, - ) - ], - ) - label = Label( - data=MaskData( - arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" - ), - annotations=[ - ObjectAnnotation( - value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), - name=name, - classifications=[classification], - ) - ], - ) - feature_schema_id = "expected_id" - classification_schema_id = "classification_id" - nested_classification_schema_id = "nested_classification_schema_id" - option_schema_id = "option_schema_id" - ontology = OntologyBuilder( - tools=[ - Tool( - Tool.Type.LINE, - name=name, - feature_schema_id=feature_schema_id, - classifications=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=radio_name, - feature_schema_id=classification_schema_id, - options=[ - Option( - value=option_name, - feature_schema_id=option_schema_id, - options=[ - OClassification( - class_type=OClassification.Type.RADIO, - name=nested_name, - feature_schema_id=nested_classification_schema_id, - options=[ - Option( - value=nested_option_name, - feature_schema_id=nested_classification_schema_id, - ) - ], - ) - ], - ) - ], - ) - ], - ) - ] - ) - label.assign_feature_schema_ids(ontology) - assert label.annotations[0].feature_schema_id == feature_schema_id - assert ( - label.annotations[0].classifications[0].feature_schema_id - == classification_schema_id - ) - assert ( - label.annotations[0].classifications[0].value.answer.feature_schema_id - == option_schema_id - ) - - def test_schema_assignment_confidence(): name = "line_feature" label = Label( From 18cbc8e15a211ca8a3215ae7ac90299a0d16423d Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:49:20 -0500 Subject: [PATCH 11/18] removed rest of deprecated --- .../src/labelbox/schema/asset_attachment.py | 11 +-- .../src/labelbox/schema/data_row_metadata.py | 24 +---- libs/labelbox/src/labelbox/schema/dataset.py | 38 +------ .../src/labelbox/schema/queue_mode.py | 6 -- libs/labelbox/src/labelbox/schema/slice.py | 37 ------- .../tests/integration/test_data_rows.py | 98 +------------------ libs/labelbox/tests/unit/test_queue_mode.py | 4 - 7 files changed, 9 insertions(+), 209 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/asset_attachment.py b/libs/labelbox/src/labelbox/schema/asset_attachment.py index 0d5598c84..9a56dbb72 100644 --- a/libs/labelbox/src/labelbox/schema/asset_attachment.py +++ b/libs/labelbox/src/labelbox/schema/asset_attachment.py @@ -7,15 +7,6 @@ class AttachmentType(str, Enum): - @classmethod - def __missing__(cls, value: object): - if str(value) == "TEXT": - warnings.warn( - "The TEXT attachment type is deprecated. Use RAW_TEXT instead." - ) - return cls.RAW_TEXT - return value - VIDEO = "VIDEO" IMAGE = "IMAGE" IMAGE_OVERLAY = "IMAGE_OVERLAY" @@ -30,7 +21,7 @@ class AssetAttachment(DbObject): """Asset attachment provides extra context about an asset while labeling. Attributes: - attachment_type (str): IMAGE, VIDEO, IMAGE_OVERLAY, HTML, RAW_TEXT, TEXT_URL, or PDF_URL. TEXT attachment type is deprecated. + attachment_type (str): IMAGE, VIDEO, IMAGE_OVERLAY, HTML, RAW_TEXT, TEXT_URL, or PDF_URL. attachment_value (str): URL to an external file or a string of text attachment_name (str): The name of the attachment """ diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 2fd90e2f9..8bed7d6f5 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -1,5 +1,4 @@ # type: ignore -import warnings from copy import deepcopy from datetime import datetime from enum import Enum @@ -673,29 +672,14 @@ def bulk_delete( if not len(deletes): raise ValueError("The 'deletes' list cannot be empty.") - passed_strings = False - for i, delete in enumerate(deletes): - if isinstance(delete.data_row_id, str): - passed_strings = True - deletes[i] = DeleteDataRowMetadata( - data_row_id=UniqueId(delete.data_row_id), - fields=delete.fields, - ) - elif isinstance(delete.data_row_id, UniqueId): - continue - elif isinstance(delete.data_row_id, GlobalKey): - continue - else: + for delete in enumerate(deletes): + if not isinstance(delete.data_row_id, UniqueId) or not isinstance( + delete.data_row_id, GlobalKey + ): raise ValueError( f"Invalid data row identifier type '{type(delete.data_row_id)}' for '{delete.data_row_id}'" ) - if passed_strings: - warnings.warn( - "Using string for data row id will be deprecated. Please use " - "UniqueId instead." - ) - def _batch_delete( deletes: List[_DeleteBatchDataRowMetadata], ) -> List[DataRowMetadataBatchResponse]: diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index 6d879e767..ac709a60b 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -166,47 +166,13 @@ def create_data_row(self, items=None, **kwargs) -> "DataRow": return self.client.get_data_row(res[0]["id"]) - def create_data_rows_sync( - self, items, file_upload_thread_count=FILE_UPLOAD_THREAD_COUNT - ) -> None: - """Synchronously bulk upload data rows. - - Use this instead of `Dataset.create_data_rows` for smaller batches of data rows that need to be uploaded quickly. - Cannot use this for uploads containing more than 1000 data rows. - Each data row is also limited to 5 attachments. - - Args: - items (iterable of (dict or str)): - See the docstring for `Dataset._create_descriptor_file` for more information. - Returns: - None. If the function doesn't raise an exception then the import was successful. - - Raises: - ResourceCreationError: If the `items` parameter does not conform to - the specification in Dataset._create_descriptor_file or if the server did not accept the - DataRow creation request (unknown reason). - InvalidAttributeError: If there are fields in `items` not valid for - a DataRow. - ValueError: When the upload parameters are invalid - """ - warnings.warn( - "This method is deprecated and will be " - "removed in a future release. Please use create_data_rows instead." - ) - - self._create_data_rows_sync( - items, file_upload_thread_count=file_upload_thread_count - ) - - return None # Return None if no exception is raised - def _create_data_rows_sync( self, items, file_upload_thread_count=FILE_UPLOAD_THREAD_COUNT ) -> "DataUpsertTask": max_data_rows_supported = 1000 if len(items) > max_data_rows_supported: raise ValueError( - f"Dataset.create_data_rows_sync() supports a max of {max_data_rows_supported} data rows." + f"Dataset._create_data_rows_sync() supports a max of {max_data_rows_supported} data rows." " For larger imports use the async function Dataset.create_data_rows()" ) if file_upload_thread_count < 1: @@ -235,8 +201,6 @@ def create_data_rows( ) -> "DataUpsertTask": """Asynchronously bulk upload data rows - Use this instead of `Dataset.create_data_rows_sync` uploads for batches that contain more than 1000 data rows. - Args: items (iterable of (dict or str)) diff --git a/libs/labelbox/src/labelbox/schema/queue_mode.py b/libs/labelbox/src/labelbox/schema/queue_mode.py index 333e92987..fc1e850d9 100644 --- a/libs/labelbox/src/labelbox/schema/queue_mode.py +++ b/libs/labelbox/src/labelbox/schema/queue_mode.py @@ -4,9 +4,3 @@ class QueueMode(str, Enum): Batch = "BATCH" Dataset = "DATA_SET" - - @classmethod - def _missing_(cls, value): - # Parses the deprecated "CATALOG" value back to QueueMode.Batch. - if value == "CATALOG": - return QueueMode.Batch diff --git a/libs/labelbox/src/labelbox/schema/slice.py b/libs/labelbox/src/labelbox/schema/slice.py index 624731024..9a0ae912e 100644 --- a/libs/labelbox/src/labelbox/schema/slice.py +++ b/libs/labelbox/src/labelbox/schema/slice.py @@ -53,43 +53,6 @@ class CatalogSlice(Slice): Represents a Slice used for filtering data rows in Catalog. """ - def get_data_row_ids(self) -> PaginatedCollection: - """ - Fetches all data row ids that match this Slice - - Returns: - A PaginatedCollection of mapping of data row ids to global keys - """ - - warnings.warn( - "get_data_row_ids will be deprecated. Use get_data_row_identifiers instead" - ) - - query_str = """ - query getDataRowIdsBySavedQueryPyApi($id: ID!, $from: String, $first: Int!) { - getDataRowIdsBySavedQuery(input: { - savedQueryId: $id, - after: $from - first: $first - }) { - totalCount - nodes - pageInfo { - endCursor - hasNextPage - } - } - } - """ - return PaginatedCollection( - client=self.client, - query=query_str, - params={"id": str(self.uid)}, - dereferencing=["getDataRowIdsBySavedQuery", "nodes"], - obj_class=lambda _, data_row_id: data_row_id, - cursor_path=["getDataRowIdsBySavedQuery", "pageInfo", "endCursor"], - ) - def get_data_row_identifiers(self) -> PaginatedCollection: """ Fetches all data row ids and global keys (where defined) that match this Slice diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 7d777a28a..f68f1059e 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -500,8 +500,6 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): [ ("create_data_rows", "class"), ("create_data_rows", "dict"), - ("create_data_rows_sync", "class"), - ("create_data_rows_sync", "dict"), ("create_data_row", "class"), ("create_data_row", "dict"), ], @@ -539,7 +537,6 @@ def create_data_row(data_rows): CREATION_FUNCTION = { "create_data_rows": dataset.create_data_rows, - "create_data_rows_sync": dataset.create_data_rows_sync, "create_data_row": create_data_row, } data_rows = [METADATA_FIELDS[metadata_obj_type]] @@ -804,49 +801,6 @@ def test_data_row_attachments(dataset, image_url): ) -def test_create_data_rows_sync_attachments(dataset, image_url): - attachments = [ - ("IMAGE", image_url, "image URL"), - ("RAW_TEXT", "test-text", None), - ("IMAGE_OVERLAY", image_url, "Overlay"), - ("HTML", image_url, None), - ] - attachments_per_data_row = 3 - dataset.create_data_rows_sync( - [ - { - "row_data": image_url, - "external_id": "test-id", - "attachments": [ - { - "type": attachment_type, - "value": attachment_value, - "name": attachment_name, - } - for _ in range(attachments_per_data_row) - ], - } - for attachment_type, attachment_value, attachment_name in attachments - ] - ) - data_rows = list(dataset.data_rows()) - assert len(data_rows) == len(attachments) - for data_row in data_rows: - assert len(list(data_row.attachments())) == attachments_per_data_row - - -def test_create_data_rows_sync_mixed_upload(dataset, image_url): - n_local = 100 - n_urls = 100 - with NamedTemporaryFile() as fp: - fp.write("Test data".encode()) - fp.flush() - dataset.create_data_rows_sync( - [{DataRow.row_data: image_url}] * n_urls + [fp.name] * n_local - ) - assert len(list(dataset.data_rows())) == n_local + n_urls - - def test_create_data_row_attachment(data_row): att = data_row.create_attachment( "IMAGE", "https://example.com/image.jpg", "name" @@ -1086,53 +1040,6 @@ def test_data_row_delete_and_create_with_same_global_key( assert task.result[0]["global_key"] == global_key_1 -def test_data_row_bulk_creation_sync_with_unique_global_keys( - dataset, sample_image -): - global_key_1 = str(uuid.uuid4()) - global_key_2 = str(uuid.uuid4()) - global_key_3 = str(uuid.uuid4()) - - dataset.create_data_rows_sync( - [ - {DataRow.row_data: sample_image, DataRow.global_key: global_key_1}, - {DataRow.row_data: sample_image, DataRow.global_key: global_key_2}, - {DataRow.row_data: sample_image, DataRow.global_key: global_key_3}, - ] - ) - - assert {row.global_key for row in dataset.data_rows()} == { - global_key_1, - global_key_2, - global_key_3, - } - - -def test_data_row_bulk_creation_sync_with_same_global_keys( - dataset, sample_image -): - global_key_1 = str(uuid.uuid4()) - - with pytest.raises(ResourceCreationError) as exc_info: - dataset.create_data_rows_sync( - [ - { - DataRow.row_data: sample_image, - DataRow.global_key: global_key_1, - }, - { - DataRow.row_data: sample_image, - DataRow.global_key: global_key_1, - }, - ] - ) - - assert len(list(dataset.data_rows())) == 1 - assert list(dataset.data_rows())[0].global_key == global_key_1 - assert "Duplicate global key" in str(exc_info.value) - assert exc_info.value.args[1] # task id - - @pytest.fixture def conversational_data_rows(dataset, conversational_content): examples = [ @@ -1174,7 +1081,7 @@ def test_invalid_media_type(dataset, conversational_content): # TODO: What error kind should this be? It looks like for global key we are # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(ResourceCreationError): - dataset.create_data_rows_sync( + dataset.( [{**conversational_content, "media_type": "IMAGE"}] ) @@ -1184,7 +1091,8 @@ def test_create_tiled_layer(dataset, tile_content): {**tile_content, "media_type": "TMS_GEO"}, tile_content, ] - dataset.create_data_rows_sync(examples) + task = dataset.create_data_rows(examples) + task.wait_until_done() data_rows = list(dataset.data_rows()) assert len(data_rows) == len(examples) for data_row in data_rows: diff --git a/libs/labelbox/tests/unit/test_queue_mode.py b/libs/labelbox/tests/unit/test_queue_mode.py index a07b14a54..0711b13af 100644 --- a/libs/labelbox/tests/unit/test_queue_mode.py +++ b/libs/labelbox/tests/unit/test_queue_mode.py @@ -3,10 +3,6 @@ from labelbox.schema.queue_mode import QueueMode -def test_parse_deprecated_catalog(): - assert QueueMode("CATALOG") == QueueMode.Batch - - def test_parse_batch(): assert QueueMode("BATCH") == QueueMode.Batch From daf5bc2baecaad0ca1312f70e293929b5493ae78 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:39:46 -0500 Subject: [PATCH 12/18] Fixed bad tests --- .../src/labelbox/schema/data_row_metadata.py | 24 ++++----------- .../test_data_row_delete_metadata.py | 30 ++++--------------- .../tests/integration/test_data_rows.py | 2 +- 3 files changed, 12 insertions(+), 44 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 8bed7d6f5..386d5939b 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -14,6 +14,7 @@ Type, Union, overload, + get_args, ) from pydantic import ( @@ -27,7 +28,7 @@ ) from typing_extensions import Annotated -from labelbox.schema.identifiable import GlobalKey, UniqueId +from labelbox.schema.identifiable import GlobalKey, UniqueId, DataRowIdentifier from labelbox.schema.identifiables import DataRowIdentifiers, UniqueIds from labelbox.schema.ontology import SchemaId from labelbox.utils import ( @@ -87,7 +88,7 @@ class DataRowMetadata(_CamelCaseMixin): class DeleteDataRowMetadata(_CamelCaseMixin): - data_row_id: Union[str, UniqueId, GlobalKey] = None + data_row_id: Union[UniqueId, GlobalKey] = None fields: List[SchemaId] @@ -646,21 +647,10 @@ def bulk_delete( >>> ) >>> mdo.batch_delete([metadata]) - >>> delete = DeleteDataRowMetadata( - >>> data_row_id="global-key", - >>> fields=[ - >>> "schema-id-1", - >>> "schema-id-2" - >>> ... - >>> ] - >>> ) - >>> mdo.batch_delete([metadata]) - Args: deletes: Data row and schema ids to delete - For data row, we support UniqueId, str, and GlobalKey. - If you pass a str, we will assume it is a UniqueId + For data row, we support UniqueId and GlobalKey. Do not pass a mix of data row ids and global keys in the same list Returns: @@ -672,10 +662,8 @@ def bulk_delete( if not len(deletes): raise ValueError("The 'deletes' list cannot be empty.") - for delete in enumerate(deletes): - if not isinstance(delete.data_row_id, UniqueId) or not isinstance( - delete.data_row_id, GlobalKey - ): + for delete in deletes: + if not isinstance(delete.data_row_id, get_args(DataRowIdentifier)): raise ValueError( f"Invalid data row identifier type '{type(delete.data_row_id)}' for '{delete.data_row_id}'" ) diff --git a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py index a2ffd31ba..ad9c9e1ee 100644 --- a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py +++ b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py @@ -121,14 +121,9 @@ def data_row_global_key(data_row): return GlobalKey(data_row.global_key) -@pytest.fixture -def data_row_id_as_str(data_row): - return data_row.uid - - @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -154,7 +149,7 @@ def test_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_bulk_partial_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -195,21 +190,6 @@ def data_row_unique_ids(big_dataset): return deletes -@pytest.fixture -def data_row_ids_as_str(big_dataset): - deletes = [] - data_row_ids = [dr.uid for dr in big_dataset.data_rows()] - - for data_row_id in data_row_ids: - deletes.append( - DeleteDataRowMetadata( - data_row_id=data_row_id, - fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID], - ) - ) - return deletes - - @pytest.fixture def data_row_global_keys(big_dataset): deletes = [] @@ -227,7 +207,7 @@ def data_row_global_keys(big_dataset): @pytest.mark.parametrize( "data_rows_for_delete", - ["data_row_ids_as_str", "data_row_unique_ids", "data_row_global_keys"], + ["data_row_unique_ids", "data_row_global_keys"], ) def test_large_bulk_delete_datarow_metadata( data_rows_for_delete, big_dataset, mdo, request @@ -267,7 +247,7 @@ def test_large_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_enum_metadata( data_row_for_delete, @@ -304,7 +284,7 @@ def test_bulk_delete_datarow_enum_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], + ["data_row_unique_id", "data_row_global_key"], ) def test_delete_non_existent_schema_id( data_row_for_delete, data_row, mdo, request diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index f68f1059e..481385e75 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -1081,7 +1081,7 @@ def test_invalid_media_type(dataset, conversational_content): # TODO: What error kind should this be? It looks like for global key we are # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(ResourceCreationError): - dataset.( + dataset._create_data_rows_sync( [{**conversational_content, "media_type": "IMAGE"}] ) From af38e1fba6b04488fdcd4f1c6cef18778bb3a682 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:52:22 -0500 Subject: [PATCH 13/18] Revert tests --- .../src/labelbox/schema/data_row_metadata.py | 32 +++++++++++++++---- .../test_data_row_delete_metadata.py | 30 ++++++++++++++--- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/data_row_metadata.py b/libs/labelbox/src/labelbox/schema/data_row_metadata.py index 386d5939b..cb45ef57f 100644 --- a/libs/labelbox/src/labelbox/schema/data_row_metadata.py +++ b/libs/labelbox/src/labelbox/schema/data_row_metadata.py @@ -1,4 +1,5 @@ # type: ignore +import warnings from copy import deepcopy from datetime import datetime from enum import Enum @@ -14,7 +15,6 @@ Type, Union, overload, - get_args, ) from pydantic import ( @@ -28,7 +28,7 @@ ) from typing_extensions import Annotated -from labelbox.schema.identifiable import GlobalKey, UniqueId, DataRowIdentifier +from labelbox.schema.identifiable import GlobalKey, UniqueId from labelbox.schema.identifiables import DataRowIdentifiers, UniqueIds from labelbox.schema.ontology import SchemaId from labelbox.utils import ( @@ -88,7 +88,7 @@ class DataRowMetadata(_CamelCaseMixin): class DeleteDataRowMetadata(_CamelCaseMixin): - data_row_id: Union[UniqueId, GlobalKey] = None + data_row_id: Union[str, UniqueId, GlobalKey] = None fields: List[SchemaId] @@ -647,10 +647,21 @@ def bulk_delete( >>> ) >>> mdo.batch_delete([metadata]) + >>> delete = DeleteDataRowMetadata( + >>> data_row_id="global-key", + >>> fields=[ + >>> "schema-id-1", + >>> "schema-id-2" + >>> ... + >>> ] + >>> ) + >>> mdo.batch_delete([metadata]) + Args: deletes: Data row and schema ids to delete - For data row, we support UniqueId and GlobalKey. + For data row, we support UniqueId, str, and GlobalKey. + If you pass a str, we will assume it is a UniqueId Do not pass a mix of data row ids and global keys in the same list Returns: @@ -662,8 +673,17 @@ def bulk_delete( if not len(deletes): raise ValueError("The 'deletes' list cannot be empty.") - for delete in deletes: - if not isinstance(delete.data_row_id, get_args(DataRowIdentifier)): + for i, delete in enumerate(deletes): + if isinstance(delete.data_row_id, str): + deletes[i] = DeleteDataRowMetadata( + data_row_id=UniqueId(delete.data_row_id), + fields=delete.fields, + ) + elif isinstance(delete.data_row_id, UniqueId): + continue + elif isinstance(delete.data_row_id, GlobalKey): + continue + else: raise ValueError( f"Invalid data row identifier type '{type(delete.data_row_id)}' for '{delete.data_row_id}'" ) diff --git a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py index ad9c9e1ee..a2ffd31ba 100644 --- a/libs/labelbox/tests/integration/test_data_row_delete_metadata.py +++ b/libs/labelbox/tests/integration/test_data_row_delete_metadata.py @@ -121,9 +121,14 @@ def data_row_global_key(data_row): return GlobalKey(data_row.global_key) +@pytest.fixture +def data_row_id_as_str(data_row): + return data_row.uid + + @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -149,7 +154,7 @@ def test_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_bulk_partial_delete_datarow_metadata( data_row_for_delete, data_row, mdo, request @@ -190,6 +195,21 @@ def data_row_unique_ids(big_dataset): return deletes +@pytest.fixture +def data_row_ids_as_str(big_dataset): + deletes = [] + data_row_ids = [dr.uid for dr in big_dataset.data_rows()] + + for data_row_id in data_row_ids: + deletes.append( + DeleteDataRowMetadata( + data_row_id=data_row_id, + fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID], + ) + ) + return deletes + + @pytest.fixture def data_row_global_keys(big_dataset): deletes = [] @@ -207,7 +227,7 @@ def data_row_global_keys(big_dataset): @pytest.mark.parametrize( "data_rows_for_delete", - ["data_row_unique_ids", "data_row_global_keys"], + ["data_row_ids_as_str", "data_row_unique_ids", "data_row_global_keys"], ) def test_large_bulk_delete_datarow_metadata( data_rows_for_delete, big_dataset, mdo, request @@ -247,7 +267,7 @@ def test_large_bulk_delete_datarow_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_bulk_delete_datarow_enum_metadata( data_row_for_delete, @@ -284,7 +304,7 @@ def test_bulk_delete_datarow_enum_metadata( @pytest.mark.parametrize( "data_row_for_delete", - ["data_row_unique_id", "data_row_global_key"], + ["data_row_id_as_str", "data_row_unique_id", "data_row_global_key"], ) def test_delete_non_existent_schema_id( data_row_for_delete, data_row, mdo, request From 8599b4f434e93f5c735441faee7de6bfb09f3baf Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:30:57 -0500 Subject: [PATCH 14/18] Remove queue mode as something to set --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/client.py | 10 ++-------- libs/labelbox/src/labelbox/project_validation.py | 4 +--- libs/labelbox/src/labelbox/schema/project.py | 12 ------------ libs/labelbox/src/labelbox/schema/queue_mode.py | 6 ------ libs/labelbox/src/labelbox/schema/user_group.py | 2 -- .../tests/unit/schema/test_user_group.py | 2 -- libs/labelbox/tests/unit/test_project.py | 2 -- libs/labelbox/tests/unit/test_queue_mode.py | 16 ---------------- 9 files changed, 3 insertions(+), 52 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/queue_mode.py delete mode 100644 libs/labelbox/tests/unit/test_queue_mode.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 7f7081947..850aec0be 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -59,7 +59,6 @@ from labelbox.schema.project_resource_tag import ProjectResourceTag from labelbox.schema.media_type import MediaType from labelbox.schema.slice import Slice, CatalogSlice, ModelSlice -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.task_queue import TaskQueue from labelbox.schema.label_score import LabelScore from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 611871dbb..dc0567d7e 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -72,7 +72,6 @@ CONSENSUS_AUTO_AUDIT_PERCENTAGE, QualityMode, ) -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.role import Role from labelbox.schema.search_filters import SearchFilter from labelbox.schema.send_to_annotate_params import ( @@ -465,16 +464,16 @@ def _create(self, db_object_type, data, extra_params={}): data = {**data, **extra_params} query_string, params = query.create(db_object_type, data) + print(query_string) res = self.execute( query_string, params, raise_return_resource_not_found=True ) - if not res: raise LabelboxError( "Failed to create %s" % db_object_type.type_name() ) res = res["create%s" % db_object_type.type_name()] - + print(res) return db_object_type(self, res) def create_model_config( @@ -621,7 +620,6 @@ def create_project( name (str): A name for the project description (str): A short summary for the project media_type (MediaType): The type of assets that this project will accept - queue_mode (Optional[QueueMode]): The queue mode to use quality_modes (Optional[List[QualityMode]]): The quality modes to use (e.g. Benchmark, Consensus). Defaults to Benchmark. is_benchmark_enabled (Optional[bool]): Whether the project supports benchmark. Defaults to None. @@ -853,11 +851,7 @@ def create_response_creation_project( return self._create_project(_CoreProjectInput(**input)) def _create_project(self, input: _CoreProjectInput) -> Project: - media_type_value = input.media_type.value - params = input.model_dump(exclude_none=True) - if media_type_value: - params["media_type"] = media_type_value extra_params = { Field.String("dataset_name_or_id"): params.pop( diff --git a/libs/labelbox/src/labelbox/project_validation.py b/libs/labelbox/src/labelbox/project_validation.py index 8940dd161..41f1fa762 100644 --- a/libs/labelbox/src/labelbox/project_validation.py +++ b/libs/labelbox/src/labelbox/project_validation.py @@ -11,7 +11,6 @@ CONSENSUS_AUTO_AUDIT_PERCENTAGE, QualityMode, ) -from labelbox.schema.queue_mode import QueueMode PositiveInt = Annotated[int, Field(gt=0)] @@ -20,7 +19,6 @@ class _CoreProjectInput(BaseModel): name: str description: Optional[str] = None media_type: MediaType - queue_mode: QueueMode = Field(default=QueueMode.Batch, frozen=True) auto_audit_percentage: Optional[float] = None auto_audit_number_of_labels: Optional[int] = None quality_modes: Optional[Set[QualityMode]] = Field( @@ -33,7 +31,7 @@ class _CoreProjectInput(BaseModel): data_row_count: Optional[PositiveInt] = None editor_task_type: Optional[EditorTaskType] = None - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", use_enum_values=True) @model_validator(mode="after") def validate_fields(self): diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 0daf3af10..2205a05e2 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -61,7 +61,6 @@ ProjectOverview, ProjectOverviewDetailed, ) -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.resource_tag import ResourceTag from labelbox.schema.task import Task from labelbox.schema.task_queue import TaskQueue @@ -109,7 +108,6 @@ class Project(DbObject, Updateable, Deletable): created_at (datetime) setup_complete (datetime) last_activity_time (datetime) - queue_mode (string) auto_audit_number_of_labels (int) auto_audit_percentage (float) is_benchmark_enabled (bool) @@ -132,7 +130,6 @@ class Project(DbObject, Updateable, Deletable): created_at = Field.DateTime("created_at") setup_complete = Field.DateTime("setup_complete") last_activity_time = Field.DateTime("last_activity_time") - queue_mode = Field.Enum(QueueMode, "queue_mode") auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels") auto_audit_percentage = Field.Float("auto_audit_percentage") # Bind data_type and allowedMediaTYpe using the GraphQL type MediaType @@ -734,9 +731,6 @@ def create_batch( Raises: lbox.exceptions.ValueError if a project is not batch mode, if the project is auto data generation, if the batch exceeds 100k data rows """ - # @TODO: make this automatic? - if self.queue_mode != QueueMode.Batch: - raise ValueError("Project must be in batch mode") if self.is_auto_data_generation(): raise ValueError( @@ -816,9 +810,6 @@ def create_batches( Returns: a task for the created batches """ - if self.queue_mode != QueueMode.Batch: - raise ValueError("Project must be in batch mode") - dr_ids = [] if data_rows is not None: for dr in data_rows: @@ -901,9 +892,6 @@ def create_batches_from_dataset( Returns: a task for the created batches """ - if self.queue_mode != QueueMode.Batch: - raise ValueError("Project must be in batch mode") - if consensus_settings: consensus_settings = ConsensusSettings( **consensus_settings diff --git a/libs/labelbox/src/labelbox/schema/queue_mode.py b/libs/labelbox/src/labelbox/schema/queue_mode.py deleted file mode 100644 index fc1e850d9..000000000 --- a/libs/labelbox/src/labelbox/schema/queue_mode.py +++ /dev/null @@ -1,6 +0,0 @@ -from enum import Enum - - -class QueueMode(str, Enum): - Batch = "BATCH" - Dataset = "DATA_SET" diff --git a/libs/labelbox/src/labelbox/schema/user_group.py b/libs/labelbox/src/labelbox/schema/user_group.py index 2dd9f76ca..2e93b4376 100644 --- a/libs/labelbox/src/labelbox/schema/user_group.py +++ b/libs/labelbox/src/labelbox/schema/user_group.py @@ -14,7 +14,6 @@ from labelbox.schema.media_type import MediaType from labelbox.schema.ontology_kind import EditorTaskType from labelbox.schema.project import Project -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.user import User @@ -411,7 +410,6 @@ def _get_projects_set(self, project_nodes): project_values = defaultdict(lambda: None) project_values["id"] = project["id"] project_values["name"] = project["name"] - project_values["queueMode"] = QueueMode.Batch.value project_values["editorTaskType"] = EditorTaskType.Missing.value project_values["mediaType"] = MediaType.Image.value projects.add(Project(self.client, project_values)) diff --git a/libs/labelbox/tests/unit/schema/test_user_group.py b/libs/labelbox/tests/unit/schema/test_user_group.py index 1df555a64..6bc29048d 100644 --- a/libs/labelbox/tests/unit/schema/test_user_group.py +++ b/libs/labelbox/tests/unit/schema/test_user_group.py @@ -14,7 +14,6 @@ from labelbox.schema.media_type import MediaType from labelbox.schema.ontology_kind import EditorTaskType from labelbox.schema.project import Project -from labelbox.schema.queue_mode import QueueMode from labelbox.schema.user import User from labelbox.schema.user_group import UserGroup, UserGroupColor @@ -32,7 +31,6 @@ def group_project(): project_values = defaultdict(lambda: None) project_values["id"] = "project_id" project_values["name"] = "Test Project" - project_values["queueMode"] = QueueMode.Batch.value project_values["editorTaskType"] = EditorTaskType.Missing.value project_values["mediaType"] = MediaType.Image.value return Project(MagicMock(Client), project_values) diff --git a/libs/labelbox/tests/unit/test_project.py b/libs/labelbox/tests/unit/test_project.py index a8fd87b48..1bc6fa840 100644 --- a/libs/labelbox/tests/unit/test_project.py +++ b/libs/labelbox/tests/unit/test_project.py @@ -21,7 +21,6 @@ def project_entity(): "editorTaskType": "MODEL_CHAT_EVALUATION", "lastActivityTime": "2021-06-01T00:00:00.000Z", "allowedMediaType": "IMAGE", - "queueMode": "BATCH", "setupComplete": "2021-06-01T00:00:00.000Z", "modelSetupComplete": None, "uploadType": "Auto", @@ -62,7 +61,6 @@ def test_project_editor_task_type( "editorTaskType": api_editor_task_type, "lastActivityTime": "2021-06-01T00:00:00.000Z", "allowedMediaType": "IMAGE", - "queueMode": "BATCH", "setupComplete": "2021-06-01T00:00:00.000Z", "modelSetupComplete": None, "uploadType": "Auto", diff --git a/libs/labelbox/tests/unit/test_queue_mode.py b/libs/labelbox/tests/unit/test_queue_mode.py deleted file mode 100644 index 0711b13af..000000000 --- a/libs/labelbox/tests/unit/test_queue_mode.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from labelbox.schema.queue_mode import QueueMode - - -def test_parse_batch(): - assert QueueMode("BATCH") == QueueMode.Batch - - -def test_parse_data_set(): - assert QueueMode("DATA_SET") == QueueMode.Dataset - - -def test_fails_for_unknown(): - with pytest.raises(ValueError): - QueueMode("foo") From 73a4acbb638571be9e181c9c263fb1b48a6acd68 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:32:03 -0500 Subject: [PATCH 15/18] Remove print --- libs/labelbox/src/labelbox/client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index dc0567d7e..aa08ab0b3 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -464,7 +464,6 @@ def _create(self, db_object_type, data, extra_params={}): data = {**data, **extra_params} query_string, params = query.create(db_object_type, data) - print(query_string) res = self.execute( query_string, params, raise_return_resource_not_found=True ) @@ -473,7 +472,6 @@ def _create(self, db_object_type, data, extra_params={}): "Failed to create %s" % db_object_type.type_name() ) res = res["create%s" % db_object_type.type_name()] - print(res) return db_object_type(self, res) def create_model_config( From 584ae44ab947aed70b990634f81381f5252af64f Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:47:51 -0500 Subject: [PATCH 16/18] Fix tests --- libs/labelbox/tests/integration/test_project.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/labelbox/tests/integration/test_project.py b/libs/labelbox/tests/integration/test_project.py index 0f9d66036..ea995c6f6 100644 --- a/libs/labelbox/tests/integration/test_project.py +++ b/libs/labelbox/tests/integration/test_project.py @@ -315,7 +315,6 @@ def test_clone(client, project, rand_gen): assert cloned_project.description == project.description assert cloned_project.media_type == project.media_type - assert cloned_project.queue_mode == project.queue_mode assert ( cloned_project.auto_audit_number_of_labels == project.auto_audit_number_of_labels From 104ea69e4d4af3528306d822347ca96cad8910bb Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Tue, 8 Oct 2024 15:52:35 -0500 Subject: [PATCH 17/18] Update dataset.py --- libs/labelbox/src/labelbox/schema/dataset.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index ac709a60b..c3934e878 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -169,12 +169,7 @@ def create_data_row(self, items=None, **kwargs) -> "DataRow": def _create_data_rows_sync( self, items, file_upload_thread_count=FILE_UPLOAD_THREAD_COUNT ) -> "DataUpsertTask": - max_data_rows_supported = 1000 - if len(items) > max_data_rows_supported: - raise ValueError( - f"Dataset._create_data_rows_sync() supports a max of {max_data_rows_supported} data rows." - " For larger imports use the async function Dataset.create_data_rows()" - ) + if file_upload_thread_count < 1: raise ValueError( "file_upload_thread_count must be a positive integer" From dd660e6cd81778a8d16a61505b66ca65eab7460e Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:12:03 -0500 Subject: [PATCH 18/18] Fixed bad test --- libs/labelbox/tests/integration/test_dates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/labelbox/tests/integration/test_dates.py b/libs/labelbox/tests/integration/test_dates.py index 3bc24bbdd..f4e2364c7 100644 --- a/libs/labelbox/tests/integration/test_dates.py +++ b/libs/labelbox/tests/integration/test_dates.py @@ -24,6 +24,6 @@ def test_utc_conversion(project): tz = timezone(timedelta(hours=6)) # +6 timezone project.update(setup_complete=datetime.now(timezone.utc).replace(tzinfo=tz)) diff = datetime.now(timezone.utc) - project.setup_complete.replace( - tzinfo=None + tzinfo=timezone.utc ) assert diff > timedelta(hours=5, minutes=58)