From 3c74e2e53e49ce015210c99b3b2605a72c6f80c1 Mon Sep 17 00:00:00 2001 From: Tomiii Date: Tue, 10 Sep 2024 04:28:45 +0200 Subject: [PATCH 1/2] * refactor: further cleanup --- libs/labelbox/src/labelbox/schema/batch.py | 53 ---- libs/labelbox/src/labelbox/schema/dataset.py | 52 ---- .../labelbox/src/labelbox/schema/model_run.py | 51 ---- libs/labelbox/src/labelbox/schema/project.py | 154 +---------- .../data/export/legacy/test_export_catalog.py | 19 -- .../export/legacy/test_export_data_rows.py | 29 --- .../data/export/legacy/test_export_dataset.py | 44 ---- .../export/legacy/test_export_model_run.py | 45 ---- .../data/export/legacy/test_export_project.py | 236 ----------------- .../data/export/legacy/test_export_slice.py | 17 -- .../data/export/legacy/test_export_video.py | 244 ------------------ .../data/export/legacy/test_legacy_export.py | 243 ----------------- 12 files changed, 3 insertions(+), 1184 deletions(-) delete mode 100644 libs/labelbox/tests/data/export/legacy/test_export_catalog.py delete mode 100644 libs/labelbox/tests/data/export/legacy/test_export_data_rows.py delete mode 100644 libs/labelbox/tests/data/export/legacy/test_export_dataset.py delete mode 100644 libs/labelbox/tests/data/export/legacy/test_export_model_run.py delete mode 100644 libs/labelbox/tests/data/export/legacy/test_export_project.py delete mode 100644 libs/labelbox/tests/data/export/legacy/test_export_slice.py delete mode 100644 libs/labelbox/tests/data/export/legacy/test_export_video.py delete mode 100644 libs/labelbox/tests/data/export/legacy/test_legacy_export.py diff --git a/libs/labelbox/src/labelbox/schema/batch.py b/libs/labelbox/src/labelbox/schema/batch.py index 313d02c16..7e2b90948 100644 --- a/libs/labelbox/src/labelbox/schema/batch.py +++ b/libs/labelbox/src/labelbox/schema/batch.py @@ -87,59 +87,6 @@ def remove_queued_data_rows(self) -> None: }, experimental=True) - def export_data_rows(self, - timeout_seconds=120, - include_metadata: bool = False) -> Generator: - """ Returns a generator that produces all data rows that are currently - in this batch. - - Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear - until the end of the cache period. - - Args: - timeout_seconds (float): Max waiting time, in seconds. - include_metadata (bool): True to return related DataRow metadata - Returns: - Generator that yields DataRow objects belonging to this batch. - Raises: - LabelboxError: if the export fails or is unable to download within the specified time. - """ - warnings.warn( - "You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export", - DeprecationWarning) - - id_param = "batchId" - metadata_param = "includeMetadataInput" - query_str = """mutation GetBatchDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!) - {exportBatchDataRows(data:{batchId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status}} - """ % (id_param, metadata_param, id_param, metadata_param) - sleep_time = 2 - while True: - res = self.client.execute(query_str, { - id_param: self.uid, - metadata_param: include_metadata - }) - res = res["exportBatchDataRows"] - if res["status"] == "COMPLETE": - download_url = res["downloadUrl"] - response = requests.get(download_url) - response.raise_for_status() - reader = parser.reader(StringIO(response.text)) - return ( - Entity.DataRow(self.client, result) for result in reader) - elif res["status"] == "FAILED": - raise LabelboxError("Data row export failed.") - - timeout_seconds -= sleep_time - if timeout_seconds <= 0: - raise LabelboxError( - f"Unable to export data rows within {timeout_seconds} seconds." - ) - - logger.debug("Batch '%s' data row export, waiting for server...", - self.uid) - time.sleep(sleep_time) - def delete(self) -> None: """ Deletes the given batch. diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index eaa37c5b7..a656a4af9 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -337,58 +337,6 @@ def data_row_for_external_id(self, external_id) -> "DataRow": external_id) return data_rows[0] - def export_data_rows(self, - timeout_seconds=120, - include_metadata: bool = False) -> Generator: - """ Returns a generator that produces all data rows that are currently - attached to this dataset. - - Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear - until the end of the cache period. - - Args: - timeout_seconds (float): Max waiting time, in seconds. - include_metadata (bool): True to return related DataRow metadata - Returns: - Generator that yields DataRow objects belonging to this dataset. - Raises: - LabelboxError: if the export fails or is unable to download within the specified time. - """ - warnings.warn( - "You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export", - DeprecationWarning) - id_param = "datasetId" - metadata_param = "includeMetadataInput" - query_str = """mutation GetDatasetDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!) - {exportDatasetDataRows(data:{datasetId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status}} - """ % (id_param, metadata_param, id_param, metadata_param) - sleep_time = 2 - while True: - res = self.client.execute(query_str, { - id_param: self.uid, - metadata_param: include_metadata - }) - res = res["exportDatasetDataRows"] - if res["status"] == "COMPLETE": - download_url = res["downloadUrl"] - response = requests.get(download_url) - response.raise_for_status() - reader = parser.reader(StringIO(response.text)) - return ( - Entity.DataRow(self.client, result) for result in reader) - elif res["status"] == "FAILED": - raise LabelboxError("Data row export failed.") - - timeout_seconds -= sleep_time - if timeout_seconds <= 0: - raise LabelboxError( - f"Unable to export data rows within {timeout_seconds} seconds." - ) - - logger.debug("Dataset '%s' data row export, waiting for server...", - self.uid) - time.sleep(sleep_time) - def export( self, task_name: Optional[str] = None, diff --git a/libs/labelbox/src/labelbox/schema/model_run.py b/libs/labelbox/src/labelbox/schema/model_run.py index 7f8714008..7eb4662e3 100644 --- a/libs/labelbox/src/labelbox/schema/model_run.py +++ b/libs/labelbox/src/labelbox/schema/model_run.py @@ -461,57 +461,6 @@ def get_config(self) -> Dict[str, Any]: experimental=True) return res["modelRun"]["trainingMetadata"] - @experimental - def export_labels( - self, - download: bool = False, - timeout_seconds: int = 600 - ) -> Optional[Union[str, List[Dict[Any, Any]]]]: - """ - Experimental. To use, make sure client has enable_experimental=True. - - Fetches Labels from the ModelRun - - Args: - download (bool): Returns the url if False - Returns: - URL of the data file with this ModelRun's labels. - If download=True, this instead returns the contents as NDJSON format. - If the server didn't generate during the `timeout_seconds` period, - None is returned. - """ - warnings.warn( - "You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export", - DeprecationWarning) - sleep_time = 2 - query_str = """mutation exportModelRunAnnotationsPyApi($modelRunId: ID!) { - exportModelRunAnnotations(data: {modelRunId: $modelRunId}) { - downloadUrl createdAt status - } - } - """ - - while True: - url = self.client.execute( - query_str, {'modelRunId': self.uid}, - experimental=True)['exportModelRunAnnotations']['downloadUrl'] - - if url: - if not download: - return url - else: - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.content) - - timeout_seconds -= sleep_time - if timeout_seconds <= 0: - return None - - logger.debug("ModelRun '%s' label export, waiting for server...", - self.uid) - time.sleep(sleep_time) - def export(self, task_name: Optional[str] = None, params: Optional[ModelRunExportParams] = None) -> ExportTask: diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index a30ff856b..159d06000 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -313,157 +313,6 @@ def labels(self, datasets=None, order_by=None) -> PaginatedCollection: return PaginatedCollection(self.client, query_str, {id_param: self.uid}, ["project", "labels"], Label) - def export_queued_data_rows( - self, - timeout_seconds=120, - include_metadata: bool = False) -> List[Dict[str, str]]: - """ Returns all data rows that are currently enqueued for this project. - - Args: - timeout_seconds (float): Max waiting time, in seconds. - include_metadata (bool): True to return related DataRow metadata - Returns: - Data row fields for all data rows in the queue as json - Raises: - LabelboxError: if the export fails or is unable to download within the specified time. - """ - warnings.warn( - "You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export", - DeprecationWarning) - id_param = "projectId" - metadata_param = "includeMetadataInput" - query_str = """mutation GetQueuedDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!) - {exportQueuedDataRows(data:{projectId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status} } - """ % (id_param, metadata_param, id_param, metadata_param) - sleep_time = 2 - start_time = time.time() - while True: - res = self.client.execute(query_str, { - id_param: self.uid, - metadata_param: include_metadata - }) - res = res["exportQueuedDataRows"] - if res["status"] == "COMPLETE": - download_url = res["downloadUrl"] - response = requests.get(download_url) - response.raise_for_status() - return parser.loads(response.text) - elif res["status"] == "FAILED": - raise LabelboxError("Data row export failed.") - - current_time = time.time() - if current_time - start_time > timeout_seconds: - raise LabelboxError( - f"Unable to export data rows within {timeout_seconds} seconds." - ) - - logger.debug( - "Project '%s' queued data row export, waiting for server...", - self.uid) - time.sleep(sleep_time) - - def export_labels(self, - download=False, - timeout_seconds=1800, - **kwargs) -> Optional[Union[str, List[Dict[Any, Any]]]]: - """ Calls the server-side Label exporting that generates a JSON - payload, and returns the URL to that payload. - - Will only generate a new URL at a max frequency of 30 min. - - Args: - download (bool): Returns the url if False - timeout_seconds (float): Max waiting time, in seconds. - start (str): Earliest date for labels, formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" - end (str): Latest date for labels, formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" - last_activity_start (str): Will include all labels that have had any updates to - data rows, issues, comments, metadata, or reviews since this timestamp. - formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" - last_activity_end (str): Will include all labels that do not have any updates to - data rows, issues, comments, metadata, or reviews after this timestamp. - formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" - - Returns: - URL of the data file with this Project's labels. If the server didn't - generate during the `timeout_seconds` period, None is returned. - """ - warnings.warn( - "You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export", - DeprecationWarning) - - def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str: - """Returns a concatenated string of the dictionary's keys and values - - The string will be formatted as {key}: 'value' for each key. Value will be inclusive of - quotations while key will not. This can be toggled with `value_with_quotes`""" - - quote = "\"" if value_with_quotes else "" - return ",".join([ - f"""{c}: {quote}{dictionary.get(c)}{quote}""" - for c in dictionary - if dictionary.get(c) - ]) - - sleep_time = 2 - id_param = "projectId" - filter_param = "" - filter_param_dict = {} - - if "start" in kwargs or "end" in kwargs: - created_at_dict = { - "start": kwargs.get("start", ""), - "end": kwargs.get("end", "") - } - [validate_datetime(date) for date in created_at_dict.values()] - filter_param_dict["labelCreatedAt"] = "{%s}" % _string_from_dict( - created_at_dict, value_with_quotes=True) - - if "last_activity_start" in kwargs or "last_activity_end" in kwargs: - last_activity_start = kwargs.get('last_activity_start') - last_activity_end = kwargs.get('last_activity_end') - - if last_activity_start: - validate_datetime(str(last_activity_start)) - if last_activity_end: - validate_datetime(str(last_activity_end)) - - filter_param_dict["lastActivityAt"] = "{%s}" % _string_from_dict( - { - "start": last_activity_start, - "end": last_activity_end - }, - value_with_quotes=True) - - if filter_param_dict: - filter_param = """, filters: {%s }""" % (_string_from_dict( - filter_param_dict, value_with_quotes=False)) - - query_str = """mutation GetLabelExportUrlPyApi($%s: ID!) - {exportLabels(data:{projectId: $%s%s}) {downloadUrl createdAt shouldPoll} } - """ % (id_param, id_param, filter_param) - - start_time = time.time() - - while True: - res = self.client.execute(query_str, {id_param: self.uid}) - res = res["exportLabels"] - if not res["shouldPoll"] and res["downloadUrl"] is not None: - url = res['downloadUrl'] - if not download: - return url - else: - response = requests.get(url) - response.raise_for_status() - return response.json() - - current_time = time.time() - if current_time - start_time > timeout_seconds: - return None - - logger.debug("Project '%s' label export, waiting for server...", - self.uid) - time.sleep(sleep_time) - def export( self, task_name: Optional[str] = None, @@ -1944,4 +1793,7 @@ class LabelingParameterOverride(DbObject): "consensus average_benchmark_agreement last_activity_time") LabelerPerformance.__doc__ = ( "Named tuple containing info about a labeler's performance.") +<<<<<<< HEAD +======= +>>>>>>> 804340e8 (* refactor: further cleanup) diff --git a/libs/labelbox/tests/data/export/legacy/test_export_catalog.py b/libs/labelbox/tests/data/export/legacy/test_export_catalog.py deleted file mode 100644 index b5aa72a35..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_export_catalog.py +++ /dev/null @@ -1,19 +0,0 @@ -import pytest - - -@pytest.mark.parametrize('data_rows', [3], indirect=True) -def test_catalog_export_v2(client, export_v2_test_helpers, data_rows): - datarow_filter_size = 2 - data_row_ids = [dr.uid for dr in data_rows] - - params = {"performance_details": False, "label_details": False} - filters = {"data_row_ids": data_row_ids[:datarow_filter_size]} - - task_results = export_v2_test_helpers.run_catalog_export_v2_task( - client, filters=filters, params=params) - - # only 2 datarows should be exported - assert len(task_results) == datarow_filter_size - # only filtered datarows should be exported - assert set([dr['data_row']['id'] for dr in task_results - ]) == set(data_row_ids[:datarow_filter_size]) diff --git a/libs/labelbox/tests/data/export/legacy/test_export_data_rows.py b/libs/labelbox/tests/data/export/legacy/test_export_data_rows.py deleted file mode 100644 index bf8e64d21..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_export_data_rows.py +++ /dev/null @@ -1,29 +0,0 @@ -import time -from labelbox import DataRow - - -def test_export_data_rows(client, data_row, wait_for_data_row_processing): - # Ensure created data rows are indexed - data_row = wait_for_data_row_processing(client, data_row) - time.sleep(7) # temp fix for ES indexing delay - - task = DataRow.export_v2(client=client, data_rows=[data_row]) - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - assert len(task.result) == 1 - assert task.result[0]["data_row"]["id"] == data_row.uid - - task = DataRow.export_v2(client=client, data_rows=[data_row.uid]) - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - assert len(task.result) == 1 - assert task.result[0]["data_row"]["id"] == data_row.uid - - task = DataRow.export_v2(client=client, global_keys=[data_row.global_key]) - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - assert len(task.result) == 1 - assert task.result[0]["data_row"]["id"] == data_row.uid diff --git a/libs/labelbox/tests/data/export/legacy/test_export_dataset.py b/libs/labelbox/tests/data/export/legacy/test_export_dataset.py deleted file mode 100644 index e4a0b50c2..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_export_dataset.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest - - -@pytest.mark.parametrize('data_rows', [3], indirect=True) -def test_dataset_export_v2(export_v2_test_helpers, dataset, data_rows): - data_row_ids = [dr.uid for dr in data_rows] - params = {"performance_details": False, "label_details": False} - task_results = export_v2_test_helpers.run_dataset_export_v2_task( - dataset, params=params) - assert len(task_results) == len(data_row_ids) - assert set([dr['data_row']['id'] for dr in task_results - ]) == set(data_row_ids) - - # testing with a datarow ids filter - datarow_filter_size = 2 - data_row_ids = [dr.uid for dr in data_rows] - - params = {"performance_details": False, "label_details": False} - filters = {"data_row_ids": data_row_ids[:datarow_filter_size]} - - task_results = export_v2_test_helpers.run_dataset_export_v2_task( - dataset, filters=filters, params=params) - - # only 2 datarows should be exported - assert len(task_results) == datarow_filter_size - # only filtered datarows should be exported - assert set([dr['data_row']['id'] for dr in task_results - ]) == set(data_row_ids[:datarow_filter_size]) - - # testing with a global key and a datarow id filter - datarow_filter_size = 2 - global_keys = [dr.global_key for dr in data_rows] - - params = {"performance_details": False, "label_details": False} - filters = {"global_keys": global_keys[:datarow_filter_size]} - - task_results = export_v2_test_helpers.run_dataset_export_v2_task( - dataset, filters=filters, params=params) - - # only 2 datarows should be exported - assert len(task_results) == datarow_filter_size - # only filtered datarows should be exported - assert set([dr['data_row']['global_key'] for dr in task_results - ]) == set(global_keys[:datarow_filter_size]) diff --git a/libs/labelbox/tests/data/export/legacy/test_export_model_run.py b/libs/labelbox/tests/data/export/legacy/test_export_model_run.py deleted file mode 100644 index 7dfd44f0c..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_export_model_run.py +++ /dev/null @@ -1,45 +0,0 @@ -import time - - -def _model_run_export_v2_results(model_run, task_name, params, num_retries=5): - """Export model run results and retry if no results are returned.""" - while (num_retries > 0): - task = model_run.export_v2(task_name, params=params) - assert task.name == task_name - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - task_results = task.result - if len(task_results) == 0: - num_retries -= 1 - time.sleep(5) - else: - return task_results - return [] - - -def test_model_run_export_v2(model_run_with_data_rows): - model_run, labels = model_run_with_data_rows - label_ids = [label.uid for label in labels] - expected_data_rows = list(model_run.model_run_data_rows()) - - task_name = "test_task" - params = {"media_attributes": True, "predictions": True} - task_results = _model_run_export_v2_results(model_run, task_name, params) - assert len(task_results) == len(expected_data_rows) - - for task_result in task_results: - # Check export param handling - assert 'media_attributes' in task_result and task_result[ - 'media_attributes'] is not None - exported_model_run = task_result['experiments'][ - model_run.model_id]['runs'][model_run.uid] - task_label_ids_set = set( - map(lambda label: label['id'], exported_model_run['labels'])) - task_prediction_ids_set = set( - map(lambda prediction: prediction['id'], - exported_model_run['predictions'])) - for label_id in task_label_ids_set: - assert label_id in label_ids - for prediction_id in task_prediction_ids_set: - assert prediction_id in label_ids diff --git a/libs/labelbox/tests/data/export/legacy/test_export_project.py b/libs/labelbox/tests/data/export/legacy/test_export_project.py deleted file mode 100644 index f7716d5c5..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_export_project.py +++ /dev/null @@ -1,236 +0,0 @@ -import pytest -import uuid -from typing import Tuple - -from labelbox.schema.media_type import MediaType -from labelbox import Project, Dataset -from labelbox.schema.data_row import DataRow -from labelbox.schema.label import Label - -IMAGE_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg" - - -def test_project_export_v2(client, export_v2_test_helpers, - configured_project_with_label, - wait_for_data_row_processing): - project, dataset, data_row, label = configured_project_with_label - data_row = wait_for_data_row_processing(client, data_row) - label_id = label.uid - - task_name = "test_label_export_v2" - params = { - "include_performance_details": True, - "include_labels": True, - "media_type_override": MediaType.Image, - "project_details": True, - "data_row_details": True - } - - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, task_name=task_name, params=params) - - for task_result in task_results: - task_media_attributes = task_result['media_attributes'] - task_project = task_result['projects'][project.uid] - task_project_label_ids_set = set( - map(lambda prediction: prediction['id'], task_project['labels'])) - task_project_details = task_project['project_details'] - task_data_row = task_result['data_row'] - task_data_row_details = task_data_row['details'] - - assert label_id in task_project_label_ids_set - # data row - assert task_data_row['id'] == data_row.uid - assert task_data_row['external_id'] == data_row.external_id - assert task_data_row['row_data'] == data_row.row_data - - # data row details - assert task_data_row_details['dataset_id'] == dataset.uid - assert task_data_row_details['dataset_name'] == dataset.name - - assert task_data_row_details['last_activity_at'] is not None - assert task_data_row_details['created_by'] is not None - - # media attributes - assert task_media_attributes['mime_type'] == data_row.media_attributes[ - 'mimeType'] - - # project name and details - assert task_project['name'] == project.name - batch = next(project.batches()) - assert task_project_details['batch_id'] == batch.uid - assert task_project_details['batch_name'] == batch.name - assert task_project_details['priority'] is not None - assert task_project_details[ - 'consensus_expected_label_count'] is not None - assert task_project_details['workflow_history'] is not None - - # label details - assert task_project['labels'][0]['id'] == label_id - - -def test_project_export_v2_date_filters(client, export_v2_test_helpers, - configured_project_with_label, - wait_for_data_row_processing): - project, _, data_row, label = configured_project_with_label - data_row = wait_for_data_row_processing(client, data_row) - label_id = label.uid - - task_name = "test_label_export_v2_date_filters" - - filters = { - "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "task_queue_status": "InReview" - } - - # TODO: Right now we don't have a way to test this - include_performance_details = True - params = { - "performance_details": include_performance_details, - "include_labels": True, - "project_details": True, - "media_type_override": MediaType.Image - } - - task_queues = project.task_queues() - - review_queue = next( - tq for tq in task_queues if tq.queue_type == "MANUAL_REVIEW_QUEUE") - project.move_data_rows_to_task_queue([data_row.uid], review_queue.uid) - - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, task_name=task_name, filters=filters, params=params) - - for task_result in task_results: - task_project = task_result['projects'][project.uid] - task_project_label_ids_set = set( - map(lambda prediction: prediction['id'], task_project['labels'])) - assert label_id in task_project_label_ids_set - assert task_project['project_details']['workflow_status'] == 'IN_REVIEW' - - # TODO: Add back in when we have a way to test this - # if include_performance_details: - # assert 'include_performance_details' in task_result and task_result[ - # 'include_performance_details'] is not None - # else: - # assert 'include_performance_details' not in task_result or task_result[ - # 'include_performance_details'] is None - - filters = {"last_activity_at": [None, "2050-01-01 00:00:00"]} - export_v2_test_helpers.run_project_export_v2_task(project, filters=filters) - - filters = {"label_created_at": ["2000-01-01 00:00:00", None]} - export_v2_test_helpers.run_project_export_v2_task(project, filters=filters) - - -def test_project_export_v2_with_iso_date_filters(client, export_v2_test_helpers, - configured_project_with_label, - wait_for_data_row_processing): - project, _, data_row, label = configured_project_with_label - data_row = wait_for_data_row_processing(client, data_row) - label_id = label.uid - - task_name = "test_label_export_v2_with_iso_date_filters" - - filters = { - "last_activity_at": [ - "2000-01-01T00:00:00+0230", "2050-01-01T00:00:00+0230" - ], - "label_created_at": [ - "2000-01-01T00:00:00+0230", "2050-01-01T00:00:00+0230" - ] - } - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, task_name=task_name, filters=filters) - assert label_id == task_results[0]['projects'][ - project.uid]['labels'][0]['id'] - - filters = {"last_activity_at": [None, "2050-01-01T00:00:00+0230"]} - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, task_name=task_name, filters=filters) - assert label_id == task_results[0]['projects'][ - project.uid]['labels'][0]['id'] - - filters = {"label_created_at": ["2050-01-01T00:00:00+0230", None]} - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, task_name=task_name, filters=filters) - assert len(task_results) == 0 - - -@pytest.mark.parametrize("data_rows", [3], indirect=True) -def test_project_export_v2_datarows_filter( - export_v2_test_helpers, - configured_batch_project_with_multiple_datarows): - project, _, data_rows = configured_batch_project_with_multiple_datarows - - data_row_ids = [dr.uid for dr in data_rows] - datarow_filter_size = 2 - - filters = { - "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "data_row_ids": data_row_ids[:datarow_filter_size] - } - params = {"data_row_details": True, "media_type_override": MediaType.Image} - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, filters=filters, params=params) - - # only 2 datarows should be exported - assert len(task_results) == datarow_filter_size - # only filtered datarows should be exported - assert set([dr['data_row']['id'] for dr in task_results - ]) == set(data_row_ids[:datarow_filter_size]) - - global_keys = [dr.global_key for dr in data_rows] - filters = { - "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "global_keys": global_keys[:datarow_filter_size] - } - params = {"data_row_details": True, "media_type_override": MediaType.Image} - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, filters=filters, params=params) - - # only 2 datarows should be exported - assert len(task_results) == datarow_filter_size - # only filtered datarows should be exported - assert set([dr['data_row']['global_key'] for dr in task_results - ]) == set(global_keys[:datarow_filter_size]) - - -def test_batch_project_export_v2( - configured_batch_project_with_label: Tuple[Project, Dataset, DataRow, - Label], - export_v2_test_helpers, dataset: Dataset, image_url: str): - project, dataset, *_ = configured_batch_project_with_label - - batch = list(project.batches())[0] - filters = { - "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - "batch_ids": [batch.uid], - } - params = { - "include_performance_details": True, - "include_labels": True, - "media_type_override": MediaType.Image - } - task_name = "test_batch_export_v2" - task = dataset.create_data_rows([ - { - "row_data": image_url, - "external_id": "my-image" - }, - ] * 2) - task.wait_till_done() - data_rows = [dr.uid for dr in list(dataset.export_data_rows())] - batch_one = f'batch one {uuid.uuid4()}' - - # This test creates two batches, only one batch should be exporter - # Creatin second batch that will not be used in the export due to the filter: batch_id - project.create_batch(batch_one, data_rows) - - task_results = export_v2_test_helpers.run_project_export_v2_task( - project, task_name=task_name, filters=filters, params=params) - assert (batch.size == len(task_results)) diff --git a/libs/labelbox/tests/data/export/legacy/test_export_slice.py b/libs/labelbox/tests/data/export/legacy/test_export_slice.py deleted file mode 100644 index 2caa6b227..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_export_slice.py +++ /dev/null @@ -1,17 +0,0 @@ -import pytest - - -@pytest.mark.skip( - 'Skipping until we have a way to create slices programatically') -def test_export_v2_slice(client): - # Since we don't have CRUD for slices, we'll just use the one that's already there - SLICE_ID = "clk04g1e4000ryb0rgsvy1dty" - slice = client.get_catalog_slice(SLICE_ID) - task = slice.export_v2(params={ - "performance_details": False, - "label_details": True - }) - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - assert len(task.result) != 0 diff --git a/libs/labelbox/tests/data/export/legacy/test_export_video.py b/libs/labelbox/tests/data/export/legacy/test_export_video.py deleted file mode 100644 index 3a0cb4149..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_export_video.py +++ /dev/null @@ -1,244 +0,0 @@ -import time - -import pytest -import labelbox as lb -from labelbox.data.annotation_types.data.video import VideoData -import labelbox.types as lb_types -from labelbox.schema.annotation_import import AnnotationImportState - - -@pytest.fixture -def user_id(client): - return client.get_user().uid - - -@pytest.fixture -def org_id(client): - return client.get_organization().uid - - -def test_export_v2_video( - client, - configured_project_without_data_rows, - video_data, - video_data_row, - bbox_video_annotation_objects, - rand_gen, -): - - project = configured_project_without_data_rows - project_id = project.uid - labels = [] - - _, data_row_uids = video_data - project.create_batch( - rand_gen(str), - data_row_uids, # sample of data row objects - 5 # priority between 1(Highest) - 5(lowest) - ) - - for data_row_uid in data_row_uids: - labels = [ - lb_types.Label(data=VideoData(uid=data_row_uid), - annotations=bbox_video_annotation_objects) - ] - - label_import = lb.LabelImport.create_from_objects( - client, project_id, f'test-import-{project_id}', labels) - label_import.wait_until_done() - - assert label_import.state == AnnotationImportState.FINISHED - assert len(label_import.errors) == 0 - - num_retries = 5 - task = None - - while (num_retries > 0): - task = project.export_v2( - params={ - "performance_details": False, - "label_details": True, - "interpolated_frames": True - }) - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - if len(task.result) == 0: - num_retries -= 1 - time.sleep(5) - else: - break - - export_data = task.result - data_row_export = export_data[0]['data_row'] - assert data_row_export['global_key'] == video_data_row['global_key'] - assert data_row_export['row_data'] == video_data_row['row_data'] - assert export_data[0]['media_attributes']['mime_type'] == 'video/mp4' - assert export_data[0]['media_attributes'][ - 'frame_rate'] == 10 # as per the video_data fixture - assert export_data[0]['media_attributes'][ - 'frame_count'] == 100 # as per the video_data fixture - expected_export_label = { - 'label_kind': 'Video', - 'version': '1.0.0', - 'id': 'clgjnpysl000xi3zxtnp29fug', - 'label_details': { - 'created_at': '2023-04-16T17:04:23+00:00', - 'updated_at': '2023-04-16T17:04:23+00:00', - 'created_by': 'vbrodsky@labelbox.com', - 'content_last_updated_at': '2023-04-16T17:04:23+00:00', - 'reviews': [] - }, - 'annotations': { - 'frames': { - '13': { - 'objects': { - 'clgjnpyse000ui3zx6fr1d880': { - 'feature_id': 'clgjnpyse000ui3zx6fr1d880', - 'name': 'bbox', - 'annotation_kind': 'VideoBoundingBox', - 'classifications': [{ - 'feature_id': 'clgjnpyse000vi3zxtgtfh01y', - 'name': 'nested', - 'radio_answer': { - 'feature_id': 'clgjnpyse000wi3zxnxgv53ps', - 'name': 'radio_option_1', - 'classifications': [] - } - }], - 'bounding_box': { - 'top': 98.0, - 'left': 146.0, - 'height': 243.0, - 'width': 236.0 - } - } - }, - 'classifications': [] - }, - '18': { - 'objects': { - 'clgjnpyse000ui3zx6fr1d880': { - 'feature_id': 'clgjnpyse000ui3zx6fr1d880', - 'name': 'bbox', - 'annotation_kind': 'VideoBoundingBox', - 'classifications': [{ - 'feature_id': 'clgjnpyse000vi3zxtgtfh01y', - 'name': 'nested', - 'radio_answer': { - 'feature_id': 'clgjnpyse000wi3zxnxgv53ps', - 'name': 'radio_option_1', - 'classifications': [] - } - }], - 'bounding_box': { - 'top': 98.0, - 'left': 146.0, - 'height': 243.0, - 'width': 236.0 - } - } - }, - 'classifications': [] - }, - '19': { - 'objects': { - 'clgjnpyse000ui3zx6fr1d880': { - 'feature_id': 'clgjnpyse000ui3zx6fr1d880', - 'name': 'bbox', - 'annotation_kind': 'VideoBoundingBox', - 'classifications': [], - 'bounding_box': { - 'top': 98.0, - 'left': 146.0, - 'height': 243.0, - 'width': 236.0 - } - } - }, - 'classifications': [] - } - }, - 'segments': { - 'clgjnpyse000ui3zx6fr1d880': [[13, 13], [18, 19]] - }, - 'key_frame_feature_map': { - 'clgjnpyse000ui3zx6fr1d880': { - '13': True, - '18': False, - '19': True - } - }, - 'classifications': [] - } - } - - project_export_labels = export_data[0]['projects'][project_id]['labels'] - assert (len(project_export_labels) == len(labels) - ) #note we create 1 label per data row, 1 data row so 1 label - export_label = project_export_labels[0] - assert (export_label['label_kind']) == 'Video' - - assert (export_label['label_details'].keys() - ) == expected_export_label['label_details'].keys() - - expected_frames_ids = [ - vannotation.frame for vannotation in bbox_video_annotation_objects - ] - export_annotations = export_label['annotations'] - export_frames = export_annotations['frames'] - export_frames_ids = [int(frame_id) for frame_id in export_frames.keys()] - all_frames_exported = [] - for value in expected_frames_ids: # note need to understand why we are exporting more frames than we created - if value not in export_frames_ids: - all_frames_exported.append(value) - assert (len(all_frames_exported) == 0) - - # BEGINNING OF THE VIDEO INTERPOLATION ASSERTIONS - first_frame_id = bbox_video_annotation_objects[0].frame - last_frame_id = bbox_video_annotation_objects[-1].frame - - # Generate list of frames with frames in between, e.g. 13, 14, 15, 16, 17, 18, 19 - expected_frame_ids = list(range(first_frame_id, last_frame_id + 1)) - - assert export_frames_ids == expected_frame_ids - - exported_objects_dict = export_frames[str(first_frame_id)]['objects'] - - # Get the label ID - first_exported_label_id = list(exported_objects_dict.keys())[0] - - # Since the bounding box moves to the right, the interpolated frame content should start a little bit more far to the right - assert export_frames[str(first_frame_id + 1)]['objects'][ - first_exported_label_id]['bounding_box']['left'] > export_frames[ - str(first_frame_id - )]['objects'][first_exported_label_id]['bounding_box']['left'] - # But it shouldn't be further than the last frame - assert export_frames[str(first_frame_id + 1)]['objects'][ - first_exported_label_id]['bounding_box']['left'] < export_frames[ - str(last_frame_id - )]['objects'][first_exported_label_id]['bounding_box']['left'] - # END OF THE VIDEO INTERPOLATION ASSERTIONS - - frame_with_nested_classifications = export_frames['13'] - annotation = None - for _, a in frame_with_nested_classifications['objects'].items(): - if a['name'] == 'bbox': - annotation = a - break - assert (annotation is not None) - assert (annotation['annotation_kind'] == 'VideoBoundingBox') - assert (annotation['classifications']) - assert (annotation['bounding_box'] == { - 'top': 98.0, - 'left': 146.0, - 'height': 243.0, - 'width': 236.0 - }) - classifications = annotation['classifications'] - classification = classifications[0]['radio_answer'] - assert (classification['name'] == 'radio_option_1') - subclassifications = classification['classifications'] - # NOTE predictions services does not support nested classifications at the moment, see - # https://labelbox.atlassian.net/browse/AL-5588 - assert (len(subclassifications) == 0) diff --git a/libs/labelbox/tests/data/export/legacy/test_legacy_export.py b/libs/labelbox/tests/data/export/legacy/test_legacy_export.py deleted file mode 100644 index 31ae8ca91..000000000 --- a/libs/labelbox/tests/data/export/legacy/test_legacy_export.py +++ /dev/null @@ -1,243 +0,0 @@ -import uuid -import datetime -import time -import requests -import pytest - -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.schema.annotation_import import LabelImport -from labelbox import Dataset, Project - -IMAGE_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg" - - -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_export_annotations_nested_checklist( - client, configured_project_with_complex_ontology, - wait_for_data_row_processing): - project, data_row = configured_project_with_complex_ontology - data_row = wait_for_data_row_processing(client, data_row) - ontology = project.ontology().normalized - - tool = ontology["tools"][0] - - nested_check = [ - subc for subc in tool["classifications"] - if subc["name"] == "test-checklist-class" - ][0] - - data = [{ - "uuid": - str(uuid.uuid4()), - "schemaId": - tool['featureSchemaId'], - "dataRow": { - "id": data_row.uid - }, - "bbox": { - "top": 20, - "left": 20, - "height": 50, - "width": 50 - }, - "classifications": [{ - "schemaId": - nested_check["featureSchemaId"], - "answers": [ - { - "schemaId": nested_check["options"][0]["featureSchemaId"] - }, - { - "schemaId": nested_check["options"][1]["featureSchemaId"] - }, - ] - }] - }] - task = LabelImport.create_from_objects(client, project.uid, - f'label-import-{uuid.uuid4()}', data) - task.wait_until_done() - labels = project.label_generator() - object_annotation = [ - annot for annot in next(labels).annotations - if isinstance(annot, ObjectAnnotation) - ][0] - - nested_class_answers = object_annotation.classifications[0].value.answer - assert len(nested_class_answers) == 2 - - -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_export_filtered_dates(client, - configured_project_with_complex_ontology): - project, data_row = configured_project_with_complex_ontology - ontology = project.ontology().normalized - - tool = ontology["tools"][0] - - data = [{ - "uuid": str(uuid.uuid4()), - "schemaId": tool['featureSchemaId'], - "dataRow": { - "id": data_row.uid - }, - "bbox": { - "top": 20, - "left": 20, - "height": 50, - "width": 50 - } - }] - - task = LabelImport.create_from_objects(client, project.uid, - f'label-import-{uuid.uuid4()}', data) - task.wait_until_done() - - regular_export = project.export_labels(download=True) - assert len(regular_export) == 1 - - filtered_export = project.export_labels(download=True, start="2020-01-01") - assert len(filtered_export) == 1 - - filtered_export_with_time = project.export_labels( - download=True, start="2020-01-01 00:00:01") - assert len(filtered_export_with_time) == 1 - - empty_export = project.export_labels(download=True, - start="2020-01-01", - end="2020-01-02") - assert len(empty_export) == 0 - - -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_export_filtered_activity(client, - configured_project_with_complex_ontology): - project, data_row = configured_project_with_complex_ontology - ontology = project.ontology().normalized - - tool = ontology["tools"][0] - - data = [{ - "uuid": str(uuid.uuid4()), - "schemaId": tool['featureSchemaId'], - "dataRow": { - "id": data_row.uid - }, - "bbox": { - "top": 20, - "left": 20, - "height": 50, - "width": 50 - } - }] - - task = LabelImport.create_from_objects(client, project.uid, - f'label-import-{uuid.uuid4()}', data) - task.wait_until_done() - - regular_export = project.export_labels(download=True) - assert len(regular_export) == 1 - - filtered_export = project.export_labels( - download=True, - last_activity_start="2020-01-01", - last_activity_end=(datetime.datetime.now() + - datetime.timedelta(days=2)).strftime("%Y-%m-%d")) - assert len(filtered_export) == 1 - - filtered_export_with_time = project.export_labels( - download=True, last_activity_start="2020-01-01 00:00:01") - assert len(filtered_export_with_time) == 1 - - empty_export = project.export_labels( - download=True, - last_activity_start=(datetime.datetime.now() + - datetime.timedelta(days=2)).strftime("%Y-%m-%d"), - ) - - empty_export = project.export_labels( - download=True, - last_activity_end=(datetime.datetime.now() - - datetime.timedelta(days=1)).strftime("%Y-%m-%d")) - assert len(empty_export) == 0 - - -def test_export_data_rows(project: Project, dataset: Dataset): - n_data_rows = 2 - task = dataset.create_data_rows([ - { - "row_data": IMAGE_URL, - "external_id": "my-image" - }, - ] * n_data_rows) - task.wait_till_done() - - data_rows = [dr.uid for dr in list(dataset.export_data_rows())] - batch = project.create_batch("batch test", data_rows) - result = list(batch.export_data_rows()) - exported_data_rows = [dr.uid for dr in result] - - assert len(result) == n_data_rows - assert set(data_rows) == set(exported_data_rows) - - -def test_queued_data_row_export(configured_project): - result = configured_project.export_queued_data_rows() - assert len(result) == 1 - - -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_label_export(configured_project_with_label): - project, _, _, label = configured_project_with_label - label_id = label.uid - # Wait for exporter to retrieve latest labels - time.sleep(10) - - # TODO: Move to export_v2 - exported_labels_url = project.export_labels() - assert exported_labels_url is not None - exported_labels = requests.get(exported_labels_url) - labels = [example['ID'] for example in exported_labels.json()] - assert labels[0] == label_id - #TODO: Add test for bulk export back. - # The new exporter doesn't work with the create_label mutation - - -def test_issues_export(project): - exported_issues_url = project.export_issues() - assert exported_issues_url - - exported_issues_url = project.export_issues("Open") - assert exported_issues_url - assert "?status=Open" in exported_issues_url - - exported_issues_url = project.export_issues("Resolved") - assert exported_issues_url - assert "?status=Resolved" in exported_issues_url - - invalidStatusValue = "Closed" - with pytest.raises(ValueError) as exc_info: - exported_issues_url = project.export_issues(invalidStatusValue) - assert "status must be in" in str(exc_info.value) - assert "Found %s" % (invalidStatusValue) in str(exc_info.value) - - -def test_dataset_export(dataset, image_url): - n_data_rows = 2 - ids = set() - for _ in range(n_data_rows): - ids.add(dataset.create_data_row(row_data=image_url)) - result = list(dataset.export_data_rows()) - assert len(result) == n_data_rows - assert set(result) == ids - - -@pytest.mark.skip(reason="broken export v1 api, to be retired soon") -def test_data_row_export_with_empty_media_attributes( - client, configured_project_with_label, wait_for_data_row_processing): - project, _, data_row, _ = configured_project_with_label - data_row = wait_for_data_row_processing(client, data_row) - labels = list(project.label_generator()) - assert len( - labels - ) == 1, "Label export job unexpectedly returned an empty result set`" - assert labels[0].data.media_attributes == {} From 7eef219341a60a510d4a6a7b6fd40e41443d90f8 Mon Sep 17 00:00:00 2001 From: Tomiii Date: Tue, 10 Sep 2024 04:37:16 +0200 Subject: [PATCH 2/2] * refactor: remove tests --- libs/labelbox/src/labelbox/schema/project.py | 4 ---- .../data/annotation_types/test_collection.py | 4 ---- libs/labelbox/tests/integration/test_batch.py | 21 ------------------- 3 files changed, 29 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 159d06000..8a966c39b 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1793,7 +1793,3 @@ class LabelingParameterOverride(DbObject): "consensus average_benchmark_agreement last_activity_time") LabelerPerformance.__doc__ = ( "Named tuple containing info about a labeler's performance.") -<<<<<<< HEAD - -======= ->>>>>>> 804340e8 (* refactor: further cleanup) diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 34b868162..68c58ed0e 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -46,10 +46,6 @@ def create_data_rows(self, args): def wait_till_done(self): pass - def export_data_rows(self): - for export in self.exports: - yield export - def test_generator(list_of_labels): generator = LabelGenerator([list_of_labels[0]]) diff --git a/libs/labelbox/tests/integration/test_batch.py b/libs/labelbox/tests/integration/test_batch.py index d5e3b7a0f..2b7f9bfc9 100644 --- a/libs/labelbox/tests/integration/test_batch.py +++ b/libs/labelbox/tests/integration/test_batch.py @@ -201,27 +201,6 @@ def test_batch_creation_with_processing_timeout( project._wait_processing_max_seconds = stashed_wait_timeout -@pytest.mark.export_v1("export_v1 test remove later") -def test_export_data_rows(project: Project, dataset: Dataset, image_url: str, - external_id: str): - n_data_rows = 2 - task = dataset.create_data_rows([ - { - "row_data": image_url, - "external_id": external_id - }, - ] * n_data_rows) - task.wait_till_done() - - data_rows = [dr.uid for dr in list(dataset.export_data_rows())] - batch = project.create_batch("batch test", data_rows) - result = list(batch.export_data_rows()) - exported_data_rows = [dr.uid for dr in result] - - assert len(result) == n_data_rows - assert set(data_rows) == set(exported_data_rows) - - def test_list_all_batches(project: Project, client, image_url: str): """ Test to verify that we can retrieve all available batches in the project.