Merge master

Diego Ardila · Diego Ardila · commit 6a9a05a0991e · 2021-08-03T15:18:57.000-07:00
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -48,11 +48,11 @@ jobs:
           path: test_results
       - store_artifacts:
           path: test_results
-      
       - slack/notify:
           branch_pattern: master
           event: fail
           template: basic_fail_1
+
   pypi_publish:
     docker:
       - image: cimg/python:3.6
diff --git a/README.md b/README.md
@@ -28,168 +28,7 @@ pip install --upgrade scale-nucleus
 
 ## Usage
 
-The first step to using the Nucleus library is instantiating a client object.
-The client abstractions serves to authenticate the user and act as the gateway
-for users to interact with their datasets, models, and model runs.
-
-### Create a client object
-
-```python
-import nucleus
-client = nucleus.NucleusClient("YOUR_API_KEY_HERE")
-```
-
-### Create Dataset
-
-```python
-dataset = client.create_dataset("My Dataset")
-```
-
-### List Datasets
-
-```python
-datasets = client.list_datasets()
-```
-
-### Delete a Dataset
-
-By specifying target dataset id.
-A response code of 200 indicates successful deletion.
-
-```python
-client.delete_dataset("YOUR_DATASET_ID")
-```
-
-### Append Items to a Dataset
-
-You can append both local images and images from the web. Simply specify the location and Nucleus will automatically infer if it's remote or a local file.
-
-```python
-dataset_item_1 = DatasetItem(image_location="./1.jpeg", reference_id="1", metadata={"key": "value"})
-dataset_item_2 = DatasetItem(image_location="s3://srikanth-nucleus/9-1.jpg", reference_id="2", metadata={"key": "value"})
-```
-
-The append function expects a list of `DatasetItem` objects to upload, like this:
-
-```python
-response = dataset.append([dataset_item_1, dataset_item_2])
-```
-
-### Get Dataset Info
-
-Tells us the dataset name, number of dataset items, model_runs, and slice_ids.
-
-```python
-dataset.info
-```
-
-### Access Dataset Items
-
-There are three methods to access individual Dataset Items:
-
-(1) Dataset Items are accessible by reference id
-
-```python
-item = dataset.refloc("my_img_001.png")
-```
-
-(2) Dataset Items are accessible by index
-
-```python
-item = dataset.iloc(0)
-```
-
-(3) Dataset Items are accessible by the dataset_item_id assigned internally
-
-```python
-item = dataset.loc("dataset_item_id")
-```
-
-### Add Annotations
-
-Upload groundtruth annotations for the items in your dataset.
-Box2DAnnotation has same format as https://dashboard.scale.com/nucleus/docs/api#add-ground-truth
-
-```python
-annotation_1 = BoxAnnotation(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_1", metadata={})
-annotation_2 = BoxAnnotation(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_2", metadata={})
-response = dataset.annotate([annotation_1, annotation_2])
-```
-
-For particularly large payloads, please reference the accompanying scripts in **references**
-
-### Add Model
-
-The model abstraction is intended to represent a unique architecture.
-Models are independent of any dataset.
-
-```python
-model = client.add_model(name="My Model", reference_id="newest-cnn-its-new", metadata={"timestamp": "121012401"})
-```
-
-### Upload Predictions to ModelRun
-
-This method populates the model_run object with predictions. `ModelRun` objects need to reference a `Dataset` that has been created.
-Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
-Takes a list of Box2DPredictions within the payload, where Box2DPrediction
-is formulated as in https://dashboard.scale.com/nucleus/docs/api#upload-model-outputs
-
-```python
-prediction_1 = BoxPrediction(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_1", confidence=0.9)
-prediction_2 = BoxPrediction(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_2", confidence=0.2)
-
-model_run = model.create_run(name="My Model Run", metadata={"timestamp": "121012401"}, dataset=dataset, predictions=[prediction_1, prediction_2])
-```
-
-### Commit ModelRun
-
-The commit action indicates that the user is finished uploading predictions associated
-with this model run. Committing a model run kicks off Nucleus internal processes
-to calculate performance metrics like IoU. After being committed, a ModelRun object becomes immutable.
-
-```python
-model_run.commit()
-```
-
-### Get ModelRun Info
-
-Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
-
-```python
-model_run.info
-```
-
-### Accessing ModelRun Predictions
-
-You can access the modelRun predictions for an individual dataset_item through three methods:
-
-(1) user specified reference_id
-
-```python
-model_run.refloc("my_img_001.png")
-```
-
-(2) Index
-
-```python
-model_run.iloc(0)
-```
-
-(3) Internally maintained dataset_item_id
-
-```python
-model_run.loc("dataset_item_id")
-```
-
-### Delete ModelRun
-
-Delete a model run using the target model_run_id.
-
-A response code of 200 indicates successful deletion.
-
-```python
-client.delete_model_run("model_run_id")
-```
+For the most up to date documentation, reference: https://dashboard.scale.com/nucleus/docs/api?language=python.
 
 ## For Developers
 
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -86,11 +86,16 @@
     ERROR_ITEMS,
     ERROR_PAYLOAD,
     ERRORS_KEY,
+    JOB_ID_KEY,
+    JOB_LAST_KNOWN_STATUS_KEY,
+    JOB_TYPE_KEY,
+    JOB_CREATION_TIME_KEY,
     IMAGE_KEY,
     IMAGE_URL_KEY,
     ITEM_METADATA_SCHEMA_KEY,
     ITEMS_KEY,
     KEEP_HISTORY_KEY,
+    MESSAGE_KEY,
     MODEL_RUN_ID_KEY,
     NAME_KEY,
     NUCLEUS_ENDPOINT,
@@ -110,6 +115,7 @@
     NotFoundError,
     NucleusAPIError,
 )
+from .job import AsyncJob
 from .model import Model
 from .model_run import ModelRun
 from .payload_constructor import (
@@ -199,6 +205,26 @@ def list_datasets(self) -> Dict[str, Union[str, List[str]]]:
         """
         return self.make_request({}, "dataset/", requests.get)
 
+    def list_jobs(
+        self, show_completed=None, date_limit=None
+    ) -> List[AsyncJob]:
+        """
+        Lists jobs for user.
+        :return: jobs
+        """
+        payload = {show_completed: show_completed, date_limit: date_limit}
+        job_objects = self.make_request(payload, "jobs/", requests.get)
+        return [
+            AsyncJob(
+                job_id=job[JOB_ID_KEY],
+                job_last_known_status=job[JOB_LAST_KNOWN_STATUS_KEY],
+                job_type=job[JOB_TYPE_KEY],
+                job_creation_time=job[JOB_CREATION_TIME_KEY],
+                client=self,
+            )
+            for job in job_objects
+        ]
+
     def get_dataset_items(self, dataset_id) -> List[DatasetItem]:
         """
         Gets all the dataset items inside your repo as a json blob.
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -42,6 +42,10 @@
 ITEM_METADATA_SCHEMA_KEY = "item_metadata_schema"
 JOB_ID_KEY = "job_id"
 KEEP_HISTORY_KEY = "keep_history"
+JOB_STATUS_KEY = "job_status"
+JOB_LAST_KNOWN_STATUS_KEY = "job_last_known_status"
+JOB_TYPE_KEY = "job_type"
+JOB_CREATION_TIME_KEY = "job_creation_time"
 LABEL_KEY = "label"
 MASK_URL_KEY = "mask_url"
 MESSAGE_KEY = "message"
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -23,7 +23,6 @@
     DATASET_SLICES_KEY,
     DEFAULT_ANNOTATION_UPDATE_MODE,
     EXPORTED_ROWS,
-    JOB_ID_KEY,
     NAME_KEY,
     REFERENCE_IDS_KEY,
     REQUEST_ID_KEY,
@@ -181,8 +180,7 @@ def annotate(
                 payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
                 route=f"dataset/{self.id}/annotate?async=1",
             )
-
-            return AsyncJob(response[JOB_ID_KEY], self._client)
+            return AsyncJob.from_json(response, self._client)
 
         return self._client.annotate_dataset(
             self.id, annotations, update=update, batch_size=batch_size
@@ -241,7 +239,7 @@ def append(
                 payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
                 route=f"dataset/{self.id}/append?async=1",
             )
-            return AsyncJob(response[JOB_ID_KEY], self._client)
+            return AsyncJob.from_json(response, self._client)
 
         return self._client.populate_dataset(
             self.id,
@@ -368,4 +366,4 @@ def delete_annotations(
         response = self._client.delete_annotations(
             self.id, reference_ids, keep_history
         )
-        return AsyncJob(response[JOB_ID_KEY], self._client)
+        return AsyncJob.from_json(response, self._client)
diff --git a/nucleus/job.py b/nucleus/job.py
@@ -1,28 +1,39 @@
 from dataclasses import dataclass
 import time
 from typing import Dict, List
-
 import requests
+from nucleus.constants import (
+    JOB_CREATION_TIME_KEY,
+    JOB_ID_KEY,
+    JOB_LAST_KNOWN_STATUS_KEY,
+    JOB_TYPE_KEY,
+    STATUS_KEY,
+)
 
 JOB_POLLING_INTERVAL = 5
 
 
 @dataclass
 class AsyncJob:
-    id: str
+    job_id: str
+    job_last_known_status: str
+    job_type: str
+    job_creation_time: str
     client: "NucleusClient"  # type: ignore # noqa: F821
 
     def status(self) -> Dict[str, str]:
-        return self.client.make_request(
+        response = self.client.make_request(
             payload={},
-            route=f"job/{self.id}",
+            route=f"job/{self.job_id}",
             requests_command=requests.get,
         )
+        self.job_last_known_status = response[STATUS_KEY]
+        return response
 
     def errors(self) -> List[str]:
         return self.client.make_request(
             payload={},
-            route=f"job/{self.id}/errors",
+            route=f"job/{self.job_id}/errors",
             requests_command=requests.get,
         )
 
@@ -42,6 +53,16 @@ def sleep_until_complete(self, verbose_std_out=True):
         if final_status["status"] == "Errored":
             raise JobError(final_status, self)
 
+    @classmethod
+    def from_json(cls, payload: dict, client):
+        return cls(
+            job_id=payload[JOB_ID_KEY],
+            job_last_known_status=payload[JOB_LAST_KNOWN_STATUS_KEY],
+            job_type=payload[JOB_TYPE_KEY],
+            job_creation_time=payload[JOB_CREATION_TIME_KEY],
+            client=client,
+        )
+
 
 class JobError(Exception):
     def __init__(self, job_status: Dict[str, str], job: AsyncJob):
diff --git a/nucleus/model_run.py b/nucleus/model_run.py
@@ -8,7 +8,6 @@
     ANNOTATIONS_KEY,
     BOX_TYPE,
     DEFAULT_ANNOTATION_UPDATE_MODE,
-    JOB_ID_KEY,
     POLYGON_TYPE,
     REQUEST_ID_KEY,
     SEGMENTATION_TYPE,
@@ -115,8 +114,7 @@ def predict(
                 payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
                 route=f"modelRun/{self.model_run_id}/predict?async=1",
             )
-
-            return AsyncJob(response[JOB_ID_KEY], self._client)
+            return AsyncJob.from_json(response, self._client)
         else:
             return self._client.predict(self.model_run_id, annotations, update)
 
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -6,7 +6,9 @@
 from nucleus.dataset_item import DatasetItem
 from nucleus.job import AsyncJob
 from nucleus.utils import convert_export_payload, format_dataset_item_response
-from nucleus.constants import EXPORTED_ROWS
+from nucleus.constants import (
+    EXPORTED_ROWS,
+)
 
 
 class Slice:
@@ -122,7 +124,7 @@ def send_to_labeling(self, project_id: str):
         response = self._client.make_request(
             {}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
         )
-        return AsyncJob(response["job_id"], self._client)
+        return AsyncJob.from_json(response, self._client)
 
 
 def check_annotations_are_in_slice(
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
diff --git a/tests/test_jobs.py b/tests/test_jobs.py
diff --git a/tests/test_prediction.py b/tests/test_prediction.py