Skip to content

Commit 90e47b7

Browse files
author
Diego Ardila
committed
passing integration test
1 parent 5834651 commit 90e47b7

15 files changed

+248
-108
lines changed

nucleus/__init__.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
# pylint: disable=C0302
7171
from requests.packages.urllib3.util.retry import Retry
7272

73-
from .constants import REFERENCE_IDS_KEY, DATASET_ITEM_IDS_KEY
73+
from .constants import REFERENCE_IDS_KEY, DATASET_ITEM_IDS_KEY, UPDATE_KEY
7474
from .dataset import Dataset
7575
from .dataset_item import DatasetItem
7676
from .annotation import (
@@ -123,7 +123,6 @@
123123
AUTOTAGS_KEY,
124124
ANNOTATION_METADATA_SCHEMA_KEY,
125125
ITEM_METADATA_SCHEMA_KEY,
126-
FORCE_KEY,
127126
EMBEDDINGS_URL_KEY,
128127
)
129128
from .model import Model
@@ -151,11 +150,14 @@ def __init__(
151150
self,
152151
api_key: str,
153152
use_notebook: bool = False,
154-
endpoint=NUCLEUS_ENDPOINT,
153+
endpoint: str = None,
155154
):
156155
self.api_key = api_key
157156
self.tqdm_bar = tqdm.tqdm
158-
self.endpoint = endpoint
157+
if endpoint is None:
158+
self.endpoint = os.environ.get(
159+
"NUCLEUS_ENDPOINT", NUCLEUS_ENDPOINT
160+
)
159161
self._use_notebook = use_notebook
160162
if use_notebook:
161163
self.tqdm_bar = tqdm_notebook.tqdm
@@ -497,7 +499,7 @@ def exception_handler(request, exception):
497499
items = payload[ITEMS_KEY]
498500
payloads = [
499501
# batch_size images per request
500-
{ITEMS_KEY: items[i : i + batch_size], FORCE_KEY: update}
502+
{ITEMS_KEY: items[i : i + batch_size], UPDATE_KEY: update}
501503
for i in range(0, len(items), batch_size)
502504
]
503505

nucleus/constants.py

Lines changed: 54 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,65 @@
1-
NUCLEUS_ENDPOINT = "https://api.scale.com/v1/nucleus"
2-
DEFAULT_NETWORK_TIMEOUT_SEC = 120
3-
ITEMS_KEY = "items"
4-
ITEM_KEY = "item"
5-
REFERENCE_ID_KEY = "reference_id"
6-
REFERENCE_IDS_KEY = "reference_ids"
7-
DATASET_ID_KEY = "dataset_id"
8-
IMAGE_KEY = "image"
9-
IMAGE_URL_KEY = "image_url"
10-
NEW_ITEMS = "new_items"
11-
UPDATED_ITEMS = "updated_items"
12-
IGNORED_ITEMS = "ignored_items"
13-
ERROR_ITEMS = "upload_errors"
14-
ERROR_PAYLOAD = "error_payload"
15-
ERROR_CODES = "error_codes"
1+
ANNOTATIONS_IGNORED_KEY = "annotations_ignored"
162
ANNOTATIONS_KEY = "annotations"
17-
ANNOTATION_ID_KEY = "annotation_id"
183
ANNOTATIONS_PROCESSED_KEY = "annotations_processed"
19-
ANNOTATIONS_IGNORED_KEY = "annotations_ignored"
20-
PREDICTIONS_PROCESSED_KEY = "predictions_processed"
21-
PREDICTIONS_IGNORED_KEY = "predictions_ignored"
4+
ANNOTATION_ID_KEY = "annotation_id"
5+
ANNOTATION_METADATA_SCHEMA_KEY = "annotation_metadata_schema"
6+
BOX_TYPE = "box"
7+
POLYGON_TYPE = "polygon"
8+
SEGMENTATION_TYPE = "segmentation"
9+
ANNOTATION_TYPES = (BOX_TYPE, POLYGON_TYPE, SEGMENTATION_TYPE)
2210
ANNOTATION_UPDATE_KEY = "update"
23-
DEFAULT_ANNOTATION_UPDATE_MODE = False
24-
STATUS_CODE_KEY = "status_code"
25-
STATUS_KEY = "status"
26-
SUCCESS_STATUS_CODES = [200, 201, 202]
27-
ERRORS_KEY = "errors"
28-
MODEL_RUN_ID_KEY = "model_run_id"
29-
MODEL_ID_KEY = "model_id"
30-
DATASET_ITEM_ID_KEY = "dataset_item_id"
31-
ITEM_ID_KEY = "item_id"
11+
AUTOTAGS_KEY = "autotags"
12+
13+
CONFIDENCE_KEY = "confidence"
14+
DATASET_ID_KEY = "dataset_id"
3215
DATASET_ITEM_IDS_KEY = "dataset_item_ids"
33-
SLICE_ID_KEY = "slice_id"
34-
DATASET_NAME_KEY = "name"
16+
DATASET_ITEM_ID_KEY = "dataset_item_id"
17+
DATASET_LENGTH_KEY = "length"
3518
DATASET_MODEL_RUNS_KEY = "model_run_ids"
19+
DATASET_NAME_KEY = "name"
3620
DATASET_SLICES_KEY = "slice_ids"
37-
DATASET_LENGTH_KEY = "length"
38-
FORCE_KEY = "update"
21+
DEFAULT_ANNOTATION_UPDATE_MODE = False
22+
DEFAULT_NETWORK_TIMEOUT_SEC = 120
23+
EMBEDDINGS_URL_KEY = "embeddings_url"
24+
ERRORS_KEY = "errors"
25+
ERROR_CODES = "error_codes"
26+
ERROR_ITEMS = "upload_errors"
27+
ERROR_PAYLOAD = "error_payload"
28+
GEOMETRY_KEY = "geometry"
29+
HEIGHT_KEY = "height"
30+
IGNORED_ITEMS = "ignored_items"
31+
IMAGE_KEY = "image"
32+
IMAGE_URL_KEY = "image_url"
33+
INDEX_KEY = "index"
34+
ITEMS_KEY = "items"
35+
ITEM_ID_KEY = "item_id"
36+
ITEM_KEY = "item"
37+
ITEM_METADATA_SCHEMA_KEY = "item_metadata_schema"
38+
JOB_ID_KEY = "job_id"
39+
LABEL_KEY = "label"
40+
MASK_URL_KEY = "mask_url"
41+
MESSAGE_KEY = "message"
3942
METADATA_KEY = "metadata"
43+
MODEL_ID_KEY = "model_id"
44+
MODEL_RUN_ID_KEY = "model_run_id"
4045
NAME_KEY = "name"
41-
LABEL_KEY = "label"
42-
CONFIDENCE_KEY = "confidence"
46+
NEW_ITEMS = "new_items"
47+
NUCLEUS_ENDPOINT = "https://api.scale.com/v1/nucleus"
4348
ORIGINAL_IMAGE_URL_KEY = "original_image_url"
44-
X_KEY = "x"
45-
Y_KEY = "y"
46-
WIDTH_KEY = "width"
47-
HEIGHT_KEY = "height"
49+
PREDICTIONS_IGNORED_KEY = "predictions_ignored"
50+
PREDICTIONS_PROCESSED_KEY = "predictions_processed"
51+
REFERENCE_IDS_KEY = "reference_ids"
52+
REFERENCE_ID_KEY = "reference_id"
53+
REQUEST_ID_KEY = "requestId"
54+
SEGMENTATIONS_KEY = "segmentations"
55+
SLICE_ID_KEY = "slice_id"
56+
STATUS_CODE_KEY = "status_code"
57+
STATUS_KEY = "status"
58+
SUCCESS_STATUS_CODES = [200, 201, 202]
4859
TYPE_KEY = "type"
60+
UPDATED_ITEMS = "updated_items"
61+
UPDATE_KEY = "update"
4962
VERTICES_KEY = "vertices"
50-
BOX_TYPE = "box"
51-
POLYGON_TYPE = "polygon"
52-
SEGMENTATION_TYPE = "segmentation"
53-
ANNOTATION_TYPES = (BOX_TYPE, POLYGON_TYPE, SEGMENTATION_TYPE)
54-
GEOMETRY_KEY = "geometry"
55-
AUTOTAGS_KEY = "autotags"
56-
ANNOTATION_METADATA_SCHEMA_KEY = "annotation_metadata_schema"
57-
ITEM_METADATA_SCHEMA_KEY = "item_metadata_schema"
58-
MASK_URL_KEY = "mask_url"
59-
INDEX_KEY = "index"
60-
SEGMENTATIONS_KEY = "segmentations"
61-
EMBEDDINGS_URL_KEY = "embeddings_url"
62-
JOB_ID_KEY = "job_id"
63-
MESSAGE_KEY = "message"
63+
WIDTH_KEY = "width"
64+
X_KEY = "x"
65+
Y_KEY = "y"

nucleus/dataset.py

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1-
from typing import Any, Dict, List, Optional
1+
import uuid
2+
from typing import Any, Dict, List, Optional, Union
23

34
import requests
45

5-
from nucleus.utils import format_dataset_item_response
6+
from nucleus.job import AsyncJob
7+
from nucleus.utils import (
8+
format_dataset_item_response,
9+
serialize_and_write_to_presigned_url,
10+
)
611

712
from .annotation import Annotation
813
from .constants import (
@@ -14,8 +19,10 @@
1419
DEFAULT_ANNOTATION_UPDATE_MODE,
1520
NAME_KEY,
1621
REFERENCE_IDS_KEY,
22+
REQUEST_ID_KEY,
23+
UPDATE_KEY,
1724
)
18-
from .dataset_item import DatasetItem
25+
from .dataset_item import DatasetItem, check_all_paths_remote
1926
from .payload_constructor import construct_model_run_creation_payload
2027

2128

@@ -26,7 +33,11 @@ class Dataset:
2633
compare model performance on you data.
2734
"""
2835

29-
def __init__(self, dataset_id: str, client):
36+
def __init__(
37+
self,
38+
dataset_id: str,
39+
client: "NucleusClient", # type:ignore # noqa: F821
40+
):
3041
self.id = dataset_id
3142
self._client = client
3243

@@ -160,16 +171,18 @@ def ingest_tasks(self, task_ids: dict):
160171
def append(
161172
self,
162173
dataset_items: List[DatasetItem],
163-
force: Optional[bool] = False,
174+
update: Optional[bool] = False,
164175
batch_size: Optional[int] = 20,
165-
) -> dict:
176+
asynchronous=False,
177+
) -> Union[dict, AsyncJob]:
166178
"""
167179
Appends images with metadata (dataset items) to the dataset. Overwrites images on collision if forced.
168180
169181
Parameters:
170182
:param dataset_items: items to upload
171-
:param force: if True overwrites images on collision
183+
:param update: if True overwrites images and metadata on collision
172184
:param batch_size: batch parameter for long uploads
185+
:param aynchronous: if True, return a job object representing asynchronous ingestion job.
173186
:return:
174187
{
175188
'dataset_id': str,
@@ -178,10 +191,29 @@ def append(
178191
'ignored_items': int,
179192
}
180193
"""
194+
if asynchronous:
195+
check_all_paths_remote(dataset_items)
196+
request_id = uuid.uuid4().hex
197+
response = self._client.make_request(
198+
payload={},
199+
route=f"dataset/{self.id}/signedUrl/{request_id}",
200+
requests_command=requests.get,
201+
)
202+
print(response["signed_url"])
203+
204+
serialize_and_write_to_presigned_url(
205+
dataset_items, response["signed_url"]
206+
)
207+
response = self._client.make_request(
208+
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
209+
route=f"dataset/{self.id}/append?async=1",
210+
)
211+
return AsyncJob(response["job_id"], self._client)
212+
181213
return self._client.populate_dataset(
182214
self.id,
183215
dataset_items,
184-
force=force,
216+
force=update,
185217
batch_size=batch_size,
186218
)
187219

nucleus/dataset_item.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
import os.path
33
from dataclasses import dataclass
4-
from typing import Optional
4+
from typing import List, Optional
55

66
from .constants import (
77
DATASET_ITEM_ID_KEY,
@@ -22,7 +22,7 @@ class DatasetItem:
2222

2323
def __post_init__(self):
2424
self.image_url = self.image_location
25-
self.local = self._is_local_path(self.image_location)
25+
self.local = is_local_path(self.image_location)
2626

2727
@classmethod
2828
def from_json(cls, payload: dict):
@@ -36,10 +36,6 @@ def from_json(cls, payload: dict):
3636
metadata=payload.get(METADATA_KEY, {}),
3737
)
3838

39-
def _is_local_path(self, path: str) -> bool:
40-
path_components = [comp.lower() for comp in path.split("/")]
41-
return path_components[0] not in {"https:", "http:", "s3:", "gs:"}
42-
4339
def local_file_exists(self):
4440
return os.path.isfile(self.image_url)
4541

@@ -56,3 +52,17 @@ def to_payload(self) -> dict:
5652

5753
def to_json(self) -> str:
5854
return json.dumps(self.to_payload())
55+
56+
57+
def is_local_path(path: str) -> bool:
58+
path_components = [comp.lower() for comp in path.split("/")]
59+
return path_components[0] not in {"https:", "http:", "s3:", "gs:"}
60+
61+
62+
def check_all_paths_remote(dataset_items: List[DatasetItem]):
63+
for item in dataset_items:
64+
if is_local_path(item.image_location):
65+
raise ValueError(
66+
f"All paths must be remote, but {item.image_location} is either "
67+
"local, or a remote URL type that is not supported."
68+
)

nucleus/job.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from dataclasses import dataclass
2+
import time
3+
from typing import Dict, List
4+
5+
import requests
6+
7+
8+
@dataclass
9+
class AsyncJob:
10+
id: str
11+
client: "NucleusClient" # type: ignore # noqa: F821
12+
13+
def status(self) -> Dict[str, str]:
14+
return self.client.make_request(
15+
payload={},
16+
route=f"job/{self.id}",
17+
requests_command=requests.get,
18+
)
19+
20+
def errors(self) -> List[str]:
21+
return self.client.make_request(
22+
payload={},
23+
route=f"job/${self.id}/errors",
24+
requests_command=requests.get,
25+
)
26+
27+
def sleep_until_complete(self, verbose_std_out=True):
28+
while 1:
29+
time.sleep(1)
30+
status = self.status()
31+
if status["status"] == "Running":
32+
if verbose_std_out:
33+
print(status)
34+
continue
35+
break
36+
final_status = status
37+
if final_status["status"] == "Errored":
38+
raise JobError(final_status, self)
39+
40+
41+
class JobError(Exception):
42+
def __init__(self, job_status: Dict[str, str], job: AsyncJob):
43+
final_status_message = job_status["message"]
44+
final_status = job_status["status"]
45+
message = (
46+
f"The job reported a final status of {final_status} "
47+
"This could, however, mean a partial success with some successes and some failures. "
48+
f"The final status message was: {final_status_message} \n"
49+
f"For more detailed error messages you can call {str(job)}.errors()"
50+
)
51+
super().__init__(message)

nucleus/payload_constructor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
REFERENCE_ID_KEY,
1818
ANNOTATIONS_KEY,
1919
ITEMS_KEY,
20-
FORCE_KEY,
20+
UPDATE_KEY,
2121
MODEL_ID_KEY,
2222
ANNOTATION_METADATA_SCHEMA_KEY,
2323
SEGMENTATIONS_KEY,
@@ -34,7 +34,7 @@ def construct_append_payload(
3434
return (
3535
{ITEMS_KEY: items}
3636
if not force
37-
else {ITEMS_KEY: items, FORCE_KEY: True}
37+
else {ITEMS_KEY: items, UPDATE_KEY: True}
3838
)
3939

4040

0 commit comments

Comments
 (0)