Skip to content

Commit 0ebb44a

Browse files
authored
Merge pull request #88 from scaleapi/list_jobs
Add list_jobs to nucleus-python-client api
2 parents a876e89 + c5f7bf2 commit 0ebb44a

File tree

11 files changed

+113
-25
lines changed

11 files changed

+113
-25
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@ dataset = client.create_dataset("My Dataset")
5050
```python
5151
datasets = client.list_datasets()
5252
```
53-
53+
### List Jobs
54+
```python
55+
jobs = client.list_jobs()
56+
```
5457
### Delete a Dataset
5558

5659
By specifying target dataset id.

nucleus/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,16 @@
8686
ERROR_ITEMS,
8787
ERROR_PAYLOAD,
8888
ERRORS_KEY,
89+
JOB_ID_KEY,
90+
JOB_LAST_KNOWN_STATUS_KEY,
91+
JOB_TYPE_KEY,
92+
JOB_CREATION_TIME_KEY,
8993
IMAGE_KEY,
9094
IMAGE_URL_KEY,
9195
ITEM_METADATA_SCHEMA_KEY,
9296
ITEMS_KEY,
9397
KEEP_HISTORY_KEY,
98+
MESSAGE_KEY,
9499
MODEL_RUN_ID_KEY,
95100
NAME_KEY,
96101
NUCLEUS_ENDPOINT,
@@ -110,6 +115,7 @@
110115
NotFoundError,
111116
NucleusAPIError,
112117
)
118+
from .job import AsyncJob
113119
from .model import Model
114120
from .model_run import ModelRun
115121
from .payload_constructor import (
@@ -199,6 +205,26 @@ def list_datasets(self) -> Dict[str, Union[str, List[str]]]:
199205
"""
200206
return self.make_request({}, "dataset/", requests.get)
201207

208+
def list_jobs(
209+
self, show_completed=None, date_limit=None
210+
) -> List[AsyncJob]:
211+
"""
212+
Lists jobs for user.
213+
:return: jobs
214+
"""
215+
payload = {show_completed: show_completed, date_limit: date_limit}
216+
job_objects = self.make_request(payload, "jobs/", requests.get)
217+
return [
218+
AsyncJob(
219+
job_id=job[JOB_ID_KEY],
220+
job_last_known_status=job[JOB_LAST_KNOWN_STATUS_KEY],
221+
job_type=job[JOB_TYPE_KEY],
222+
job_creation_time=job[JOB_CREATION_TIME_KEY],
223+
client=self,
224+
)
225+
for job in job_objects
226+
]
227+
202228
def get_dataset_items(self, dataset_id) -> List[DatasetItem]:
203229
"""
204230
Gets all the dataset items inside your repo as a json blob.

nucleus/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@
4242
ITEM_METADATA_SCHEMA_KEY = "item_metadata_schema"
4343
JOB_ID_KEY = "job_id"
4444
KEEP_HISTORY_KEY = "keep_history"
45+
JOB_STATUS_KEY = "job_status"
46+
JOB_LAST_KNOWN_STATUS_KEY = "job_last_known_status"
47+
JOB_TYPE_KEY = "job_type"
48+
JOB_CREATION_TIME_KEY = "job_creation_time"
4549
LABEL_KEY = "label"
4650
MASK_URL_KEY = "mask_url"
4751
MESSAGE_KEY = "message"

nucleus/dataset.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
DATASET_SLICES_KEY,
2424
DEFAULT_ANNOTATION_UPDATE_MODE,
2525
EXPORTED_ROWS,
26-
JOB_ID_KEY,
2726
NAME_KEY,
2827
REFERENCE_IDS_KEY,
2928
REQUEST_ID_KEY,
@@ -181,8 +180,7 @@ def annotate(
181180
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
182181
route=f"dataset/{self.id}/annotate?async=1",
183182
)
184-
185-
return AsyncJob(response[JOB_ID_KEY], self._client)
183+
return AsyncJob.from_json(response, self._client)
186184

187185
return self._client.annotate_dataset(
188186
self.id, annotations, update=update, batch_size=batch_size
@@ -241,7 +239,7 @@ def append(
241239
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
242240
route=f"dataset/{self.id}/append?async=1",
243241
)
244-
return AsyncJob(response[JOB_ID_KEY], self._client)
242+
return AsyncJob.from_json(response, self._client)
245243

246244
return self._client.populate_dataset(
247245
self.id,
@@ -368,4 +366,4 @@ def delete_annotations(
368366
response = self._client.delete_annotations(
369367
self.id, reference_ids, keep_history
370368
)
371-
return AsyncJob(response[JOB_ID_KEY], self._client)
369+
return AsyncJob.from_json(response, self._client)

nucleus/job.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,39 @@
11
from dataclasses import dataclass
22
import time
33
from typing import Dict, List
4-
54
import requests
5+
from nucleus.constants import (
6+
JOB_CREATION_TIME_KEY,
7+
JOB_ID_KEY,
8+
JOB_LAST_KNOWN_STATUS_KEY,
9+
JOB_TYPE_KEY,
10+
STATUS_KEY,
11+
)
612

713
JOB_POLLING_INTERVAL = 5
814

915

1016
@dataclass
1117
class AsyncJob:
12-
id: str
18+
job_id: str
19+
job_last_known_status: str
20+
job_type: str
21+
job_creation_time: str
1322
client: "NucleusClient" # type: ignore # noqa: F821
1423

1524
def status(self) -> Dict[str, str]:
16-
return self.client.make_request(
25+
response = self.client.make_request(
1726
payload={},
18-
route=f"job/{self.id}",
27+
route=f"job/{self.job_id}",
1928
requests_command=requests.get,
2029
)
30+
self.job_last_known_status = response[STATUS_KEY]
31+
return response
2132

2233
def errors(self) -> List[str]:
2334
return self.client.make_request(
2435
payload={},
25-
route=f"job/{self.id}/errors",
36+
route=f"job/{self.job_id}/errors",
2637
requests_command=requests.get,
2738
)
2839

@@ -42,6 +53,16 @@ def sleep_until_complete(self, verbose_std_out=True):
4253
if final_status["status"] == "Errored":
4354
raise JobError(final_status, self)
4455

56+
@classmethod
57+
def from_json(cls, payload: dict, client):
58+
return cls(
59+
job_id=payload[JOB_ID_KEY],
60+
job_last_known_status=payload[JOB_LAST_KNOWN_STATUS_KEY],
61+
job_type=payload[JOB_TYPE_KEY],
62+
job_creation_time=payload[JOB_CREATION_TIME_KEY],
63+
client=client,
64+
)
65+
4566

4667
class JobError(Exception):
4768
def __init__(self, job_status: Dict[str, str], job: AsyncJob):

nucleus/model_run.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
ANNOTATIONS_KEY,
99
BOX_TYPE,
1010
DEFAULT_ANNOTATION_UPDATE_MODE,
11-
JOB_ID_KEY,
1211
POLYGON_TYPE,
1312
REQUEST_ID_KEY,
1413
SEGMENTATION_TYPE,
@@ -115,8 +114,7 @@ def predict(
115114
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
116115
route=f"modelRun/{self.model_run_id}/predict?async=1",
117116
)
118-
119-
return AsyncJob(response[JOB_ID_KEY], self._client)
117+
return AsyncJob.from_json(response, self._client)
120118
else:
121119
return self._client.predict(self.model_run_id, annotations, update)
122120

nucleus/slice.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
from nucleus.dataset_item import DatasetItem
77
from nucleus.job import AsyncJob
88
from nucleus.utils import convert_export_payload, format_dataset_item_response
9-
from nucleus.constants import EXPORTED_ROWS
9+
from nucleus.constants import (
10+
EXPORTED_ROWS,
11+
)
1012

1113

1214
class Slice:
@@ -122,7 +124,7 @@ def send_to_labeling(self, project_id: str):
122124
response = self._client.make_request(
123125
{}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
124126
)
125-
return AsyncJob(response["job_id"], self._client)
127+
return AsyncJob.from_json(response, self._client)
126128

127129

128130
def check_annotations_are_in_slice(

tests/test_dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,12 @@ def test_dataset_append_async(dataset: Dataset):
169169
status = job.status()
170170
status["message"]["PayloadUrl"] = ""
171171
assert status == {
172-
"job_id": job.id,
172+
"job_id": job.job_id,
173173
"status": "Completed",
174174
"message": {
175175
"PayloadUrl": "",
176176
"image_upload_step": {"errored": 0, "pending": 0, "completed": 5},
177-
"started_image_processing": f"Dataset: {dataset.id}, Job: {job.id}",
177+
"started_image_processing": f"Dataset: {dataset.id}, Job: {job.job_id}",
178178
"ingest_to_reupload_queue": {
179179
"epoch": 1,
180180
"total": 5,
@@ -204,7 +204,7 @@ def test_dataset_append_async_with_1_bad_url(dataset: Dataset):
204204
status = job.status()
205205
status["message"]["PayloadUrl"] = ""
206206
assert status == {
207-
"job_id": f"{job.id}",
207+
"job_id": f"{job.job_id}",
208208
"status": "Errored",
209209
"message": {
210210
"PayloadUrl": "",
@@ -220,7 +220,7 @@ def test_dataset_append_async_with_1_bad_url(dataset: Dataset):
220220
"datasetId": f"{dataset.id}",
221221
"processed": 5,
222222
},
223-
"started_image_processing": f"Dataset: {dataset.id}, Job: {job.id}",
223+
"started_image_processing": f"Dataset: {dataset.id}, Job: {job.job_id}",
224224
},
225225
}
226226
# The error is fairly detailed and subject to change. What's important is we surface which URLs failed.
@@ -286,7 +286,7 @@ def test_annotate_async(dataset: Dataset):
286286
)
287287
job.sleep_until_complete()
288288
assert job.status() == {
289-
"job_id": job.id,
289+
"job_id": job.job_id,
290290
"status": "Completed",
291291
"message": {
292292
"annotation_upload": {
@@ -321,7 +321,7 @@ def test_annotate_async_with_error(dataset: Dataset):
321321
job.sleep_until_complete()
322322

323323
assert job.status() == {
324-
"job_id": job.id,
324+
"job_id": job.job_id,
325325
"status": "Completed",
326326
"message": {
327327
"annotation_upload": {

tests/test_indexing.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from nucleus.job import AsyncJob
12
import pytest
23

34
from .helpers import (
@@ -42,7 +43,11 @@ def test_index_integration(dataset):
4243
create_response = dataset.create_custom_index(
4344
[signed_embeddings_url], embedding_dim=3
4445
)
45-
assert JOB_ID_KEY in create_response
46+
job = AsyncJob.from_json(create_response, client="Nucleus Client")
47+
assert job.job_id
48+
assert job.job_last_known_status
49+
assert job.job_type
50+
assert job.job_creation_time
4651
assert MESSAGE_KEY in create_response
4752
job_id = create_response[JOB_ID_KEY]
4853

tests/test_jobs.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from pathlib import Path
2+
import time
3+
import pytest
4+
from nucleus import (
5+
AsyncJob,
6+
NucleusClient,
7+
)
8+
9+
10+
def test_reprs():
11+
# Have to define here in order to have access to all relevant objects
12+
def test_repr(test_object: any):
13+
assert eval(str(test_object)) == test_object
14+
15+
client = NucleusClient(api_key="fake_key")
16+
test_repr(
17+
AsyncJob(
18+
client=client,
19+
job_id="fake_job_id",
20+
job_last_known_status="fake_job_status",
21+
job_type="fake_job_type",
22+
job_creation_time="fake_job_creation_time",
23+
)
24+
)
25+
26+
27+
def test_job_creation_and_listing(CLIENT):
28+
jobs = CLIENT.list_jobs()
29+
30+
for job in jobs:
31+
assert eval(str(job)) == job

0 commit comments

Comments
 (0)