Skip to content

Commit c5162e3

Browse files
authored
refactor Slice methods to avoid fetching all items (#180)
* deprecate client.slice_info * add slice.summary for shortform info (+test) * add name and items, refactor info, fix tests - add name property - add items property - refactor old info as _fetch_all - refactor summary as new info - refactor tests relying on Slice.info * lint * fix broken Slice.info calls * pylint * mypy... * isort... * update version and changelog
1 parent c6fe4ae commit c5162e3

File tree

6 files changed

+134
-32
lines changed

6 files changed

+134
-32
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [0.4.2](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.4.2) - 2021-12-16
8+
9+
### Added
10+
- `Slice.name` property that fetches the Slice's user-defined name.
11+
- The Slice's items are no longer fetched unnecessarily; this used to cause considerable latency.
12+
- `Slice.items` property that fetches all items contained in the Slice.
13+
14+
### Changed
15+
- `Slice.info()` now only retrieves the Slice's `name`, `slice_id`, and `dataset_id`.
16+
- The Slice's items are no longer fetched unnecessarily; this used to cause considerable latency.
17+
- This method issues a warning to use `Slice.items` when attempting to `items`.
18+
19+
[###](###) Deprecated
20+
- `NucleusClient.slice_info(..)` is deprecated in favor of `Slice.info()`.
21+
22+
## [0.4.1](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.4.1) - 2021-12-13
23+
24+
### Changed
25+
- Datasets in Nucleus now fall under two categories: scene or item.
26+
- Scene Datasets can only have scenes uploaded to them.
27+
- Item Datasets can only have items uploaded to them.
28+
- `NucleusClient.create_dataset` now requires a boolean parameter `is_scene` to immutably set whether the Dataset is a scene or item Dataset.
29+
730
## [0.4.0](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.4.0) - 2021-08-12
831

932
### Added

nucleus/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,8 +741,9 @@ def get_slice(self, slice_id: str) -> Slice:
741741
"""
742742
return Slice(slice_id, self)
743743

744+
@deprecated("Prefer calling Slice.info instead.")
744745
def slice_info(self, slice_id: str) -> dict:
745-
# TODO: migrate to Slice method and deprecate
746+
# TODO: deprecate in favor of Slice.info
746747
response = self.make_request(
747748
{},
748749
f"slice/{slice_id}",

nucleus/slice.py

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77
from nucleus.constants import EXPORTED_ROWS
88
from nucleus.dataset_item import DatasetItem
99
from nucleus.job import AsyncJob
10-
from nucleus.utils import convert_export_payload, format_dataset_item_response
10+
from nucleus.utils import (
11+
KeyErrorDict,
12+
convert_export_payload,
13+
format_dataset_item_response,
14+
)
1115

1216

1317
class Slice:
@@ -41,6 +45,7 @@ def __init__(self, slice_id: str, client):
4145
self.id = slice_id
4246
self._slice_id = slice_id
4347
self._client = client
48+
self._name = None
4449
self._dataset_id = None
4550

4651
def __repr__(self):
@@ -52,6 +57,30 @@ def __eq__(self, other):
5257
return True
5358
return False
5459

60+
def _fetch_all(self) -> dict:
61+
"""Retrieves info and all items of the Slice.
62+
63+
Returns:
64+
A dict mapping keys to the corresponding info retrieved.
65+
::
66+
67+
{
68+
"name": Union[str, int],
69+
"slice_id": str,
70+
"dataset_id": str,
71+
"dataset_items": List[{
72+
"id": str,
73+
"metadata": Dict[str, Union[str, int, float]],
74+
"ref_id": str,
75+
"original_image_url": str
76+
}]
77+
}
78+
"""
79+
response = self._client.make_request(
80+
{}, f"slice/{self.id}", requests_command=requests.get
81+
)
82+
return response
83+
5584
@property
5685
def slice_id(self):
5786
warnings.warn(
@@ -60,33 +89,45 @@ def slice_id(self):
6089
)
6190
return self._slice_id
6291

92+
@property
93+
def name(self):
94+
"""The name of the Slice."""
95+
if self._name is None:
96+
self._name = self.info()["name"]
97+
return self._name
98+
6399
@property
64100
def dataset_id(self):
65101
"""The ID of the Dataset to which the Slice belongs."""
66102
if self._dataset_id is None:
67-
self.info()
103+
self._dataset_id = self.info()["dataset_id"]
68104
return self._dataset_id
69105

106+
@property
107+
def items(self):
108+
"""All DatasetItems contained in the Slice."""
109+
return self._fetch_all()["dataset_items"]
110+
70111
def info(self) -> dict:
71-
"""Retrieves info and items of the Slice.
112+
"""Retrieves the name, slice_id, and dataset_id of the Slice.
72113
73114
Returns:
74115
A dict mapping keys to the corresponding info retrieved.
75116
::
76117
77118
{
78119
"name": Union[str, int],
120+
"slice_id": str,
79121
"dataset_id": str,
80-
"dataset_items": List[{
81-
"id": str,
82-
"metadata": Dict[str, Union[str, int, float]],
83-
"ref_id": str,
84-
"original_image_url": str
85-
}]
86122
}
87123
"""
88-
info = self._client.slice_info(self.id)
89-
self._dataset_id = info["dataset_id"]
124+
info = KeyErrorDict(
125+
items="The 'items' key is now deprecated for Slice.info. Use Slice.items instead."
126+
)
127+
res = self._client.make_request(
128+
{}, f"slice/{self.id}/info", requests_command=requests.get
129+
)
130+
info.update(res)
90131
return info
91132

92133
def append(
@@ -137,11 +178,10 @@ def items_and_annotation_generator(
137178
}
138179
}]
139180
"""
140-
info = self.info()
141-
for item_metadata in info["dataset_items"]:
181+
for item_metadata in self.items:
142182
yield format_dataset_item_response(
143183
self._client.dataitem_loc(
144-
dataset_id=info["dataset_id"],
184+
dataset_id=self.dataset_id,
145185
dataset_item_id=item_metadata["id"],
146186
)
147187
)
@@ -259,14 +299,12 @@ def check_annotations_are_in_slice(
259299
1. True if all Annotations are in the Slice, False otherwise;
260300
2. List of reference IDs not in the Slice.
261301
"""
262-
info = slice_to_check.info()
263-
264302
reference_ids_not_found_in_slice = {
265303
annotation.reference_id
266304
for annotation in annotations
267305
if annotation.reference_id is not None
268306
}.difference(
269-
{item_metadata["ref_id"] for item_metadata in info["dataset_items"]}
307+
{item_metadata["ref_id"] for item_metadata in slice_to_check.items}
270308
)
271309
if reference_ids_not_found_in_slice:
272310
annotations_are_in_slice = False

nucleus/utils.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,40 @@
4848
}
4949

5050

51+
class KeyErrorDict(dict):
52+
"""Wrapper for response dicts with deprecated keys.
53+
54+
Parameters:
55+
**kwargs: Mapping from the deprecated key to a warning message.
56+
"""
57+
58+
def __init__(self, **kwargs):
59+
self._deprecated = {}
60+
61+
for key, msg in kwargs.items():
62+
if not isinstance(key, str):
63+
raise TypeError(
64+
f"All keys must be strings! Received non-string '{key}'"
65+
)
66+
if not isinstance(msg, str):
67+
raise TypeError(
68+
f"All warning messages must be strings! Received non-string '{msg}'"
69+
)
70+
71+
self._deprecated[key] = msg
72+
73+
super().__init__()
74+
75+
def __missing__(self, key):
76+
"""Raises KeyError for deprecated keys, otherwise uses base dict logic."""
77+
if key in self._deprecated:
78+
raise KeyError(self._deprecated[key])
79+
try:
80+
super().__missing__(key)
81+
except AttributeError as e:
82+
raise KeyError(key) from e
83+
84+
5185
def format_prediction_response(
5286
response: dict,
5387
) -> Union[

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.4.1"
24+
version = "0.4.2"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_slice.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,22 @@ def test_slice_create_and_delete_and_list(dataset):
6464
assert len(dataset_slices) == 1
6565
assert slc.id == dataset_slices[0]
6666

67-
response = slc.info()
68-
assert response["name"] == TEST_SLICE_NAME
69-
assert response["dataset_id"] == dataset.id
70-
assert len(response["dataset_items"]) == 2
67+
assert slc.name == TEST_SLICE_NAME
68+
assert slc.dataset_id == dataset.id
69+
70+
items = slc.items
71+
assert len(items) == 2
7172
for item in ds_items[:2]:
7273
assert (
73-
item.reference_id == response["dataset_items"][0]["ref_id"]
74-
or item.reference_id == response["dataset_items"][1]["ref_id"]
74+
item.reference_id == items[0]["ref_id"]
75+
or item.reference_id == items[1]["ref_id"]
7576
)
7677

78+
response = slc.info()
79+
assert response["name"] == TEST_SLICE_NAME
80+
assert response["slice_id"] == slc.slice_id
81+
assert response["dataset_id"] == dataset.id
82+
7783

7884
def test_slice_create_and_export(dataset):
7985
# Dataset upload
@@ -132,13 +138,13 @@ def test_slice_append(dataset):
132138
# Insert duplicate first item
133139
slc.append(reference_ids=[item.reference_id for item in ds_items[:3]])
134140

135-
response = slc.info()
136-
assert len(response["dataset_items"]) == 3
141+
items = slc.items
142+
assert len(items) == 3
137143
for item in ds_items[:3]:
138144
assert (
139-
item.reference_id == response["dataset_items"][0]["ref_id"]
140-
or item.reference_id == response["dataset_items"][1]["ref_id"]
141-
or item.reference_id == response["dataset_items"][2]["ref_id"]
145+
item.reference_id == items[0]["ref_id"]
146+
or item.reference_id == items[1]["ref_id"]
147+
or item.reference_id == items[2]["ref_id"]
142148
)
143149

144150
all_stored_items = [_[ITEM_KEY] for _ in slc.items_and_annotations()]
@@ -178,8 +184,8 @@ def test_slice_send_to_labeling(dataset):
178184
reference_ids=[ds_items[0].reference_id, ds_items[1].reference_id],
179185
)
180186

181-
response = slc.info()
182-
assert len(response["dataset_items"]) == 2
187+
items = slc.items
188+
assert len(items) == 2
183189

184190
response = slc.send_to_labeling(TEST_PROJECT_ID)
185191
assert isinstance(response, AsyncJob)

0 commit comments

Comments
 (0)