scaleapi
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎nucleus/__init__.py
Lines changed: 10 additions & 0 deletions b/‎nucleus/__init__.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎nucleus/autocurate.py
Lines changed: 26 additions & 0 deletions b/‎nucleus/autocurate.py
Lines changed: 26 additions & 0 deletions
diff --git a/‎nucleus/constants.py
Lines changed: 1 addition & 0 deletions b/‎nucleus/constants.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎nucleus/dataset.py
Lines changed: 18 additions & 0 deletions b/‎nucleus/dataset.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎nucleus/dataset_item.py
Lines changed: 11 additions & 8 deletions b/‎nucleus/dataset_item.py
Lines changed: 11 additions & 8 deletions
diff --git a/‎nucleus/model_run.py
Lines changed: 3 additions & 3 deletions b/‎nucleus/model_run.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎nucleus/slice.py
Lines changed: 18 additions & 0 deletions b/‎nucleus/slice.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 2 additions & 1 deletion b/‎pyproject.toml
Lines changed: 2 additions & 1 deletion
@@ -134,3 +134,6 @@ dmypy.json
 
 # Poetry lockfile (no need for deploys, best practice is to not check this in)
 poetry.lock
+
+# vscode
+.vscode/
@@ -1157,6 +1157,16 @@ def list_autotags(self, dataset_id: str) -> List[str]:
         )
         return response[AUTOTAGS_KEY] if AUTOTAGS_KEY in response else response
 
+    def delete_autotag(self, autotag_id: str) -> dict:
+        """
+        Deletes an autotag based on autotagId.
+        Returns an empty payload where response status `200` indicates
+        the autotag has been successfully deleted.
+        :param autotag_id: id of the autotag to delete.
+        :return: {}
+        """
+        return self.make_request({}, f"autotag/{autotag_id}", requests.delete)
+
     def delete_model(self, model_id: str) -> dict:
         """
         This endpoint deletes the specified model, along with all
 
@@ -0,0 +1,26 @@
+import datetime
+import requests
+from nucleus.constants import (
+    JOB_CREATION_TIME_KEY,
+    JOB_LAST_KNOWN_STATUS_KEY,
+    JOB_TYPE_KEY,
+)
+from nucleus.job import AsyncJob
+
+
+def entropy(name, model_run, client):
+    model_run_ids = [model_run.model_run_id]
+    dataset_id = model_run.dataset_id
+    response = client.make_request(
+        payload={"modelRunIds": model_run_ids},
+        route=f"autocurate/{dataset_id}/single_model_entropy/{name}",
+        requests_command=requests.post,
+    )
+    # TODO: the response should already have the below three fields populated
+    response[JOB_LAST_KNOWN_STATUS_KEY] = "Started"
+    response[JOB_TYPE_KEY] = "autocurateEntropy"
+    response[JOB_CREATION_TIME_KEY] = (
+        datetime.datetime.now().isoformat("T", "milliseconds") + "Z"
+    )
+    job = AsyncJob.from_json(response, client)
+    return job
@@ -87,6 +87,7 @@
 TYPE_KEY = "type"
 UPDATED_ITEMS = "updated_items"
 UPDATE_KEY = "update"
+UPLOAD_TO_SCALE_KEY = "upload_to_scale"
 URL_KEY = "url"
 VERTICES_KEY = "vertices"
 WIDTH_KEY = "width"
 
@@ -430,6 +430,24 @@ def items_and_annotations(
         )
         return convert_export_payload(api_payload[EXPORTED_ROWS])
 
+    def export_embeddings(
+        self,
+    ) -> List[Dict[str, Union[str, List[float]]]]:
+        """Returns a pd.Dataframe-ready format of dataset embeddings.
+
+        Returns:
+            A list, where each item is a dict with two keys representing a row
+            in the dataset.
+            * One value in the dict is the reference id
+            * The other value is a list of the embedding values
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"dataset/{self.id}/embeddings",
+            requests_command=requests.get,
+        )
+        return api_payload
+
     def delete_annotations(
         self, reference_ids: list = None, keep_history=False
     ):
 
@@ -7,10 +7,10 @@
 
 from .annotation import is_local_path, Point3D
 from .constants import (
-    DATASET_ITEM_ID_KEY,
     IMAGE_URL_KEY,
     METADATA_KEY,
     ORIGINAL_IMAGE_URL_KEY,
+    UPLOAD_TO_SCALE_KEY,
     REFERENCE_ID_KEY,
     TYPE_KEY,
     URL_KEY,
@@ -91,14 +91,19 @@ class DatasetItemType(Enum):
 class DatasetItem:  # pylint: disable=R0902
     image_location: Optional[str] = None
     reference_id: Optional[str] = None
-    item_id: Optional[str] = None
     metadata: Optional[dict] = None
     pointcloud_location: Optional[str] = None
+    upload_to_scale: Optional[bool] = True
 
     def __post_init__(self):
+        assert self.reference_id is not None, "reference_id is required."
         assert bool(self.image_location) != bool(
             self.pointcloud_location
         ), "Must specify exactly one of the image_location, pointcloud_location parameters"
+        if self.pointcloud_location and not self.upload_to_scale:
+            raise NotImplementedError(
+                "Skipping upload to Scale is not currently implemented for pointclouds."
+            )
         self.local = (
             is_local_path(self.image_location) if self.image_location else None
         )
@@ -127,15 +132,14 @@ def from_json(cls, payload: dict, is_scene=False):
                 image_location=image_url,
                 pointcloud_location=payload.get(POINTCLOUD_URL_KEY, None),
                 reference_id=payload.get(REFERENCE_ID_KEY, None),
-                item_id=payload.get(DATASET_ITEM_ID_KEY, None),
                 metadata=payload.get(METADATA_KEY, {}),
             )
 
         return cls(
             image_location=image_url,
             reference_id=payload.get(REFERENCE_ID_KEY, None),
-            item_id=payload.get(DATASET_ITEM_ID_KEY, None),
             metadata=payload.get(METADATA_KEY, {}),
+            upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, None),
         )
 
     def local_file_exists(self):
@@ -145,10 +149,8 @@ def to_payload(self, is_scene=False) -> dict:
         payload: Dict[str, Any] = {
             METADATA_KEY: self.metadata or {},
         }
-        if self.reference_id:
-            payload[REFERENCE_ID_KEY] = self.reference_id
-        if self.item_id:
-            payload[DATASET_ITEM_ID_KEY] = self.item_id
+
+        payload[REFERENCE_ID_KEY] = self.reference_id
 
         if is_scene:
             if self.image_location:
@@ -163,6 +165,7 @@ def to_payload(self, is_scene=False) -> dict:
                 self.image_location
             ), "Must specify image_location for DatasetItems not in a LidarScene"
             payload[IMAGE_URL_KEY] = self.image_location
+            payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
 
         return payload
 
 
@@ -31,10 +31,10 @@ class ModelRun:
     def __init__(self, model_run_id: str, dataset_id: str, client):
         self.model_run_id = model_run_id
         self._client = client
-        self._dataset_id = dataset_id
+        self.dataset_id = dataset_id
 
     def __repr__(self):
-        return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self._dataset_id}', client={self._client})"
+        return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self.dataset_id}', client={self._client})"
 
     def __eq__(self, other):
         if self.model_run_id == other.model_run_id:
@@ -115,7 +115,7 @@ def predict(
             check_all_mask_paths_remote(annotations)
 
             request_id = serialize_and_write_to_presigned_url(
-                annotations, self._dataset_id, self._client
+                annotations, self.dataset_id, self._client
             )
             response = self._client.make_request(
                 payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
 
@@ -126,6 +126,24 @@ def send_to_labeling(self, project_id: str):
         )
         return AsyncJob.from_json(response, self._client)
 
+    def export_embeddings(
+        self,
+    ) -> List[Dict[str, Union[str, List[float]]]]:
+        """Returns a pd.Dataframe-ready format of dataset embeddings.
+
+        Returns:
+            A list, where each item is a dict with two keys representing a row
+            in the dataset.
+            * One value in the dict is the reference id
+            * The other value is a list of the embedding values
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.slice_id}/embeddings",
+            requests_command=requests.get,
+        )
+        return api_payload
+
 
 def check_annotations_are_in_slice(
     annotations: List[Annotation], slice_to_check: Slice
 
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.1.17"
+version = "0.1.18"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
@@ -48,6 +48,7 @@ flake8 = "^3.9.1"
 mypy = "^0.812"
 coverage = "^5.5"
 pre-commit = "^2.12.1"
+jupyterlab = "^3.1.10"
 
 [tool.pytest.ini_options]
 markers = [