Export Scale task info (#308)

drakejwong · web-flow · commit 84bbd4a66adb · 2022-06-02T08:08:29.000-07:00
* add slice and dataset export methods; helper util

* semver bump and changelog
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,15 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.12.3](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.12.3) - 2022-06-02
+
+### Added
+
+- New methods to export associated Scale task info at either the item or scene level.
+- `Dataset.export_scale_task_info`
+- `Slice.export_scale_task_info`
+
+
 ## [0.12.2](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.12.2) - 2022-06-02
 
 ### Added
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -27,6 +27,7 @@
 AUTOTAGS_KEY = "autotags"
 AUTOTAG_SCORE_THRESHOLD = "score_threshold"
 EXPORTED_ROWS = "exportedRows"
+EXPORTED_SCALE_TASK_INFO_ROWS = "exportedScaleTaskInfoRows"
 CAMERA_MODEL_KEY = "camera_model"
 CAMERA_PARAMS_KEY = "camera_params"
 CLASS_PDF_KEY = "class_pdf"
@@ -111,6 +112,8 @@
 REFERENCE_ID_KEY = "reference_id"
 BACKEND_REFERENCE_ID_KEY = "ref_id"  # TODO(355762): Our backend returns this instead of the "proper" key sometimes.
 REQUEST_ID_KEY = "requestId"
+SCALE_TASK_INFO_KEY = "scale_task_info"
+SCENE_KEY = "scene"
 SCENES_KEY = "scenes"
 SERIALIZED_REQUEST_KEY = "serialized_request"
 SEGMENTATIONS_KEY = "segmentations"
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -18,6 +18,7 @@
     convert_export_payload,
     format_dataset_item_response,
     format_prediction_response,
+    format_scale_task_info_response,
     paginate_generator,
     serialize_and_write_to_presigned_url,
 )
@@ -1278,6 +1279,36 @@ def export_predictions(self, model):
         )
         return format_prediction_response({ANNOTATIONS_KEY: json_response})
 
+    def export_scale_task_info(self):
+        """Fetches info for all linked Scale tasks of items/scenes in the dataset.
+
+        Returns:
+            A list of dicts, each with two keys, respectively mapping to items/scenes
+            and info on their corresponding Scale tasks within the dataset::
+
+                List[{
+                    "item" | "scene": Union[:class:`DatasetItem`, :class:`Scene`],
+                    "scale_task_info": {
+                        "task_id": str,
+                        "subtask_id": str,
+                        "task_status": str,
+                        "task_audit_status": str,
+                        "task_audit_review_comment": Optional[str],
+                        "project_name": str,
+                        "batch": str,
+                        "created_at": str,
+                        "completed_at": Optional[str]
+                    }[]
+                }]
+
+        """
+        response = self._client.make_request(
+            payload=None,
+            route=f"dataset/{self.id}/exportScaleTaskInfo",
+            requests_command=requests.get,
+        )
+        return format_scale_task_info_response(response)
+
     def calculate_evaluation_metrics(self, model, options: dict = None):
         """Starts computation of evaluation metrics for a model on the dataset.
 
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -12,6 +12,7 @@
     KeyErrorDict,
     convert_export_payload,
     format_dataset_item_response,
+    format_scale_task_info_response,
     paginate_generator,
 )
 
@@ -265,6 +266,37 @@ def export_predictions(
         )
         return convert_export_payload(api_payload[EXPORTED_ROWS], True)
 
+    def export_scale_task_info(self):
+        """Fetches info for all linked Scale tasks of items/scenes in the slice.
+
+        Returns:
+            A list of dicts, each with two keys, respectively mapping to items/scenes
+            and info on their corresponding Scale tasks within the dataset::
+
+                List[{
+                    "item" | "scene": Union[:class:`DatasetItem`, :class:`Scene`],
+                    "scale_task_info": {
+                        "task_id": str,
+                        "subtask_id": str,
+                        "task_status": str,
+                        "task_audit_status": str,
+                        "task_audit_review_comment": Optional[str],
+                        "project_name": str,
+                        "batch": str,
+                        "created_at": str,
+                        "completed_at": Optional[str]
+                    }[]
+                }]
+
+        """
+        response = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.id}/exportScaleTaskInfo",
+            requests_command=requests.get,
+        )
+        # TODO: implement format function with nice keying
+        return format_scale_task_info_response(response)
+
     def send_to_labeling(self, project_id: str):
         """Send items in the Slice as tasks to a Scale labeling project.
 
diff --git a/nucleus/utils.py b/nucleus/utils.py
@@ -28,6 +28,7 @@
     BOX_TYPE,
     CATEGORY_TYPE,
     CUBOID_TYPE,
+    EXPORTED_SCALE_TASK_INFO_ROWS,
     ITEM_KEY,
     KEYPOINTS_TYPE,
     LAST_PAGE,
@@ -39,6 +40,8 @@
     POLYGON_TYPE,
     PREDICTIONS_KEY,
     REFERENCE_ID_KEY,
+    SCALE_TASK_INFO_KEY,
+    SCENE_KEY,
     SEGMENTATION_TYPE,
 )
 from .dataset_item import DatasetItem
@@ -161,7 +164,7 @@ def format_dataset_item_response(response: dict) -> dict:
     Args:
       response: JSON dictionary response from REST endpoint
     Returns:
-      item_dict: A dictionary with two entries, one for the dataset item, and annother
+      item_dict: A dictionary with two entries, one for the dataset item, and another
         for all of the associated annotations.
     """
     if ANNOTATIONS_KEY not in response:
@@ -188,6 +191,33 @@ def format_dataset_item_response(response: dict) -> dict:
     }
 
 
+def format_scale_task_info_response(response: dict) -> Union[Dict, List[Dict]]:
+    """Format the raw client response into api objects.
+
+    Args:
+      response: JSON dictionary response from REST endpoint
+    Returns:
+      A dictionary with two entries, one for the dataset item, and another
+        for all of the associated Scale tasks.
+    """
+    if EXPORTED_SCALE_TASK_INFO_ROWS not in response:
+        # Payload is empty so an error occurred
+        return response
+
+    ret = []
+    for row in response[EXPORTED_SCALE_TASK_INFO_ROWS]:
+        if ITEM_KEY in row:
+            ret.append(
+                {
+                    ITEM_KEY: DatasetItem.from_json(row[ITEM_KEY]),
+                    SCALE_TASK_INFO_KEY: row[SCALE_TASK_INFO_KEY],
+                }
+            )
+        elif SCENE_KEY in row:
+            ret.append(row)
+    return ret
+
+
 def convert_export_payload(api_payload, has_predictions: bool = False):
     """Helper function to convert raw JSON to API objects
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.12.2"
+version = "0.12.3"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]