Allow Users to Specify Trained Slice for Model Runs (#417)

vayunalapati · web-flow · commit 8b7c380e445b · 2023-12-13T06:38:05.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,10 +5,17 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.16.12](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.12) - 2023-11-29
+## [0.16.13](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.13) - 2023-12-13
 
 ### Added
+- Added `trained_slice_id` parameter to `dataset.upload_predictions()` to specify the slice ID used to train the model.
+
+### Fixes
+- Fix offset generation for image chips in `dataset.items_and_annotation_chip_generator()`
 
+## [0.16.12](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.12) - 2023-11-29
+
+### Added
 - Added tag support for slices. 
 
 Example:
@@ -21,7 +28,6 @@ Example:
 ## [0.16.11](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.11) - 2023-11-22
 
 ### Added
-
 - Added `num_processes` parameter to `dataset.items_and_annotation_chip_generator()` to specify parallel processing.
 - Method to allow for concurrent task fetches for pointcloud data
 
diff --git a/nucleus/annotation_uploader.py b/nucleus/annotation_uploader.py
@@ -57,6 +57,7 @@ def upload(
         update: bool = False,
         remote_files_per_upload_request: int = 20,
         local_files_per_upload_request: int = 10,
+        trained_slice_id: Optional[str] = None,
     ):
         """For more details on parameters and functionality, see dataset.annotate."""
         if local_files_per_upload_request > 10:
@@ -95,6 +96,7 @@ def upload(
                     update,
                     batch_size=remote_files_per_upload_request,
                     segmentation=True,
+                    trained_slice_id=trained_slice_id,
                 )
             )
         if annotations_without_files:
@@ -104,6 +106,7 @@ def upload(
                     update,
                     batch_size=batch_size,
                     segmentation=False,
+                    trained_slice_id=trained_slice_id,
                 )
             )
 
@@ -115,6 +118,7 @@ def make_batched_requests(
         update: bool,
         batch_size: int,
         segmentation: bool,
+        trained_slice_id: Optional[str],
     ):
         batches = [
             annotations[i : i + batch_size]
@@ -125,7 +129,9 @@ def make_batched_requests(
             "Segmentation batches" if segmentation else "Annotation batches"
         )
         for batch in self._client.tqdm_bar(batches, desc=progress_bar_name):
-            payload = construct_annotation_payload(batch, update)
+            payload = construct_annotation_payload(
+                batch, update, trained_slice_id
+            )
             responses.append(
                 self._client.make_request(payload, route=self._route)
             )
@@ -234,9 +240,11 @@ def __init__(
         dataset_id: Optional[str] = None,
         model_id: Optional[str] = None,
         model_run_id: Optional[str] = None,
+        trained_slice_id: Optional[str] = None,
     ):
         super().__init__(dataset_id, client)
         self._client = client
+        self.trained_slice_id = trained_slice_id
         if model_run_id is not None:
             assert model_id is None and dataset_id is None
             self._route = f"modelRun/{model_run_id}/predict"
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -149,6 +149,7 @@
 TRACK_REFERENCE_ID_KEY = "track_reference_id"
 TRACK_REFERENCE_IDS_KEY = "track_reference_ids"
 TRACKS_KEY = "tracks"
+TRAINED_SLICE_ID_KEY = "trained_slice_id"
 TRUE_POSITIVE_KEY = "true_positive"
 TYPE_KEY = "type"
 UPDATED_ITEMS = "updated_items"
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -66,6 +66,7 @@
     SLICE_ID_KEY,
     TRACK_REFERENCE_IDS_KEY,
     TRACKS_KEY,
+    TRAINED_SLICE_ID_KEY,
     UPDATE_KEY,
     VIDEO_URL_KEY,
 )
@@ -1793,6 +1794,7 @@ def upload_predictions(
         batch_size: int = 5000,
         remote_files_per_upload_request: int = 20,
         local_files_per_upload_request: int = 10,
+        trained_slice_id: Optional[str] = None,
     ):
         """Uploads predictions and associates them with an existing :class:`Model`.
 
@@ -1841,19 +1843,20 @@ def upload_predictions(
               you can try lowering this batch size. This is only relevant for
               asynchronous=False
             remote_files_per_upload_request: Number of remote files to upload in each
-                request. Segmentations have either local or remote files, if you are
-                getting timeouts while uploading segmentations with remote urls, you
-                should lower this value from its default of 20. This is only relevant for
-                asynchronous=False.
+              request. Segmentations have either local or remote files, if you are
+              getting timeouts while uploading segmentations with remote urls, you
+              should lower this value from its default of 20. This is only relevant for
+              asynchronous=False.
             local_files_per_upload_request: Number of local files to upload in each
-                request. Segmentations have either local or remote files, if you are
-                getting timeouts while uploading segmentations with local files, you
-                should lower this value from its default of 10. The maximum is 10.
-                This is only relevant for asynchronous=False
+              request. Segmentations have either local or remote files, if you are
+              getting timeouts while uploading segmentations with local files, you
+              should lower this value from its default of 10. The maximum is 10.
+              This is only relevant for asynchronous=False
+            trained_slice_id: Nucleus-generated slice ID (starts with ``slc_``) which was used
+                to train the model.
 
         Returns:
             Payload describing the synchronous upload::
-
                 {
                     "dataset_id": str,
                     "model_run_id": str,
@@ -1876,7 +1879,11 @@ def upload_predictions(
                 predictions, self.id, self._client
             )
             response = self._client.make_request(
-                payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
+                payload={
+                    REQUEST_ID_KEY: request_id,
+                    UPDATE_KEY: update,
+                    TRAINED_SLICE_ID_KEY: trained_slice_id,
+                },
                 route=f"dataset/{self.id}/model/{model.id}/uploadPredictions?async=1",
             )
             return AsyncJob.from_json(response, self._client)
@@ -1887,6 +1894,7 @@ def upload_predictions(
             update=update,
             remote_files_per_upload_request=remote_files_per_upload_request,
             local_files_per_upload_request=local_files_per_upload_request,
+            trained_slice_id=trained_slice_id,
         )
 
     def predictions_iloc(self, model, index):
diff --git a/nucleus/payload_constructor.py b/nucleus/payload_constructor.py
@@ -24,6 +24,7 @@
     SCENES_KEY,
     SEGMENTATIONS_KEY,
     TAXONOMY_NAME_KEY,
+    TRAINED_SLICE_ID_KEY,
     TYPE_KEY,
     UPDATE_KEY,
 )
@@ -76,6 +77,7 @@ def construct_annotation_payload(
         ]
     ],
     update: bool,
+    trained_slice_id: Optional[str],
 ) -> dict:
     annotations = [
         annotation.to_payload()
@@ -92,6 +94,8 @@ def construct_annotation_payload(
         payload[ANNOTATIONS_KEY] = annotations
     if segmentations:
         payload[SEGMENTATIONS_KEY] = segmentations
+    if trained_slice_id:
+        payload[TRAINED_SLICE_ID_KEY] = trained_slice_id
     return payload