1
+ """Slices are subsets of your Dataset that unlock curation and exploration workflows.
2
+
3
+ Instead of thinking of your Datasets as collections of data, it is useful to think
4
+ about them as a collection of Slices. For instance, your dataset may contain
5
+ different weather scenarios, traffic conditions, or highway types.
6
+
7
+ Perhaps your Models perform poorly on foggy weather scenarios; it is then useful
8
+ to slice your dataset into a "foggy" slice, and fine-tune model performance on
9
+ this slice until it reaches the performance you desire.
10
+ """
1
11
from typing import Dict , Iterable , List , Set , Tuple , Union
2
12
3
13
import requests
12
22
13
23
14
24
class Slice :
15
- """
16
- Slice respesents a subset of your Dataset.
17
- """
25
+ """A Slice represents a subset of DatasetItems in your Dataset."""
18
26
19
27
def __init__ (self , slice_id : str , client ):
20
28
self .slice_id = slice_id
@@ -32,21 +40,41 @@ def __eq__(self, other):
32
40
33
41
@property
34
42
def dataset_id (self ):
35
- """The id of the dataset this slice belongs to ."""
43
+ """The ID of the Dataset to which the Slice belongs ."""
36
44
if self ._dataset_id is None :
37
45
self .info ()
38
46
return self ._dataset_id
39
47
40
48
def info (self ) -> dict :
41
- """
42
- This endpoint provides information about specified slice.
43
-
44
- :return:
45
- {
46
- "name": str,
47
- "dataset_id": str,
48
- "dataset_items",
49
- }
49
+ """Retrieves info and items of the Slice. ::
50
+
51
+ import nucleus
52
+ client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
53
+ slice = client.get_slice("slc_bx86ea222a6g057x4380")
54
+
55
+ slice.info()
56
+
57
+ Returns:
58
+ A dict mapping keys to the corresponding info retrieved. ::
59
+
60
+ {
61
+ "name": "foggy",
62
+ "dataset_id": "ds_bw6de8s84pe0vbn6p5zg"
63
+ "dataset_items": [
64
+ {
65
+ "id": "di_bx79jc134x5w2janra10",
66
+ "metadata": {},
67
+ "ref_id": "image_ref_300000",
68
+ "original_image_url": "s3://bucket-and-key"
69
+ },
70
+ {
71
+ "id": "di_5x79jc134x5w2jantr30",
72
+ "metadata": {},
73
+ "ref_id": "image_ref_300001",
74
+ "original_image_url": "s3://bucket-and-key"
75
+ },
76
+ ],
77
+ }
50
78
"""
51
79
info = self ._client .slice_info (self .slice_id )
52
80
self ._dataset_id = info ["dataset_id" ]
@@ -56,18 +84,27 @@ def append(
56
84
self ,
57
85
reference_ids : List [str ] = None ,
58
86
) -> dict :
59
- """
60
- Appends to a slice from items already present in a dataset.
61
- The caller must exclusively use either datasetItemIds or reference_ids
62
- as a means of identifying items in the dataset.
87
+ """Appends existing DatasetItems from a Dataset to a Slice.
88
+
89
+ The endpoint expects a list of DatasetItem reference IDs which are set
90
+ at upload time. ::
63
91
64
- :param
65
- reference_ids: List[str],
92
+ import nucleus
93
+ client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
94
+ slice = client.get_slice("slc_bx86ea222a6g057x4380")
66
95
67
- :return:
68
- {
69
- "slice_id": str,
70
- }
96
+ # You can append to a slice from existing reference_ids
97
+ slice.append(reference_ids=["image_300000", "image_300001"])
98
+
99
+ Args:
100
+ reference_ids:
101
+ A list of user-specified IDs for DatasetItems you wish to append.
102
+
103
+ Return:
104
+ A dict of the slice_id and the newly appended DatasetItem IDs. ::
105
+
106
+ {"slice_id": "slc_bx86ea222a6g057x4380",
107
+ "new_items": ["di_bx79jc1z4x5wvjenra10", "di_bx79ha5z4x5wvjenr9y0"]}
71
108
"""
72
109
response = self ._client .append_to_slice (
73
110
slice_id = self .slice_id ,
@@ -78,15 +115,28 @@ def append(
78
115
def items_and_annotation_generator (
79
116
self ,
80
117
) -> Iterable [Dict [str , Union [DatasetItem , Dict [str , List [Annotation ]]]]]:
81
- """Returns an iterable of all DatasetItems and Annotations in this slice.
118
+ """Provides a generator of all DatasetItems and Annotations in the slice. ::
119
+
120
+ import nucleus
121
+ client = NucleusClient("YOUR_SCALE_API_KEY")
122
+ slice = client.get_slice("slc_bx86ea222a6g057x4380")
123
+
124
+ slice.items_and_annotations_generator()
82
125
83
126
Returns:
84
- An iterable, where each item is a dict with two keys representing a row
85
- in the dataset.
86
- * One value in the dict is the DatasetItem, containing a reference to the
87
- item that was annotated, for example an image_url.
88
- * The other value is a dictionary containing all the annotations for this
89
- dataset item, sorted by annotation type.
127
+ A generator where each element is a dict containing the DatasetItem
128
+ and all of its associated Annotations, grouped by type. ::
129
+
130
+ Iterable([
131
+ {"item": DatasetItem(image_location="s3://bucket-and-key",
132
+ reference_id="image_ref_300000",
133
+ metadata={},
134
+ pointcloud_location=None,
135
+ upload_to_scale=True),
136
+ "annotations": {"box": [BoxAnnotation ... ],
137
+ "segmentation": [SegmentationAnnotation ... ]},
138
+ ...
139
+ ])
90
140
"""
91
141
info = self .info ()
92
142
for item_metadata in info ["dataset_items" ]:
@@ -100,15 +150,28 @@ def items_and_annotation_generator(
100
150
def items_and_annotations (
101
151
self ,
102
152
) -> List [Dict [str , Union [DatasetItem , Dict [str , List [Annotation ]]]]]:
103
- """Returns a list of all DatasetItems and Annotations in this slice.
153
+ """Provides a list of all DatasetItems and Annotations in the Slice. ::
154
+
155
+ import nucleus
156
+ client = NucleusClient("YOUR_SCALE_API_KEY")
157
+ slice = client.get_slice("slc_bx86ea222a6g057x4380")
158
+
159
+ slice.items_and_annotations
104
160
105
161
Returns:
106
- A list, where each item is a dict with two keys representing a row
107
- in the dataset.
108
- * One value in the dict is the DatasetItem, containing a reference to the
109
- item that was annotated.
110
- * The other value is a dictionary containing all the annotations for this
111
- dataset item, sorted by annotation type.
162
+ A list where each element is a dict containing the DatasetItem
163
+ and all of its associated Annotations, grouped by type (e.g. box). ::
164
+
165
+ [
166
+ {"item": DatasetItem(image_location="s3://bucket-and-key",
167
+ reference_id="image_ref_300000",
168
+ metadata={},
169
+ pointcloud_location=None,
170
+ upload_to_scale=True),
171
+ "annotations": {"box": [BoxAnnotation ... ],
172
+ "segmentation": [SegmentationAnnotation ... ]},
173
+ ...
174
+ ]
112
175
"""
113
176
api_payload = self ._client .make_request (
114
177
payload = None ,
@@ -118,6 +181,27 @@ def items_and_annotations(
118
181
return convert_export_payload (api_payload [EXPORTED_ROWS ])
119
182
120
183
def send_to_labeling (self , project_id : str ):
184
+ """Send items in the Slice as tasks to a Scale labeling project.
185
+
186
+ This endpoint submits the items of the Slice as tasks to a pre-existing Scale Annotation project uniquely identified by projectId. Only projects of type General Image Annotation are currently supported. Additionally, in order for task submission to succeed, the project must have task instructions and geometries configured as project-level parameters. In order to create a project or set project parameters, you must use the Scale Annotation API, which is documented here: `Scale Annotation API Documentation <https://docs.scale.com/reference/project-overview>`_. When the newly created annotation tasks are annotated, the annotations will be automatically reflected in the Nucleus platform.
187
+
188
+ For self-serve projects, user can choose to submit the slice as a calibration batch, which is recommended for brand new labeling projects. For more information about calibration batches, please reference `Overview of Self Serve Workflow <https://docs.scale.com/reference/batch-overview>`_. Note: A batch can be either a calibration batch or a self label batch, but not both.
189
+
190
+ Note: Nucleus only supports bounding box, polygon, and line annotations. If the project parameters specify any other geometries (ellipses or points), those objects will be annotated, but they will not be reflected in Nucleus. ::
191
+
192
+ import nucleus
193
+ client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
194
+ slice = client.get_slice("slc_bx86ea222a6g057x4380")
195
+
196
+ project_id = "2408bfb36443d50025f41bbd"
197
+ job = slice.send_to_labeling(project_id)
198
+ job.sleep_until_complete() # block until async job complete
199
+
200
+ Args:
201
+ project_id: A unique id of the target annotation project.
202
+ calibration_batch: (Relevant to Scale Rapid projects only) An optional boolean signaling whether to send as a "calibration batch" for taskers to preliminarily evaluate your project instructions and parameters.
203
+ self_label_batch: (Relevant to Scale Rapid projects only) An optional boolean signaling whether to send as a "self-label batch," in which your team can label internally through Scale Rapid.
204
+ """
121
205
response = self ._client .make_request (
122
206
{}, f"slice/{ self .slice_id } /{ project_id } /send_to_labeling"
123
207
)
@@ -126,13 +210,22 @@ def send_to_labeling(self, project_id: str):
126
210
def export_embeddings (
127
211
self ,
128
212
) -> List [Dict [str , Union [str , List [float ]]]]:
129
- """Returns a pd.Dataframe-ready format of dataset embeddings.
213
+ """Provides a pd.DataFrame-like list of dataset embeddings. ::
214
+
215
+ import nucleus
216
+ client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
217
+ slice = client.get_slice("slc_bx86ea222a6g057x4380")
218
+
219
+ slice.export_embeddings()
130
220
131
221
Returns:
132
- A list, where each item is a dict with two keys representing a row
133
- in the dataset.
134
- * One value in the dict is the reference id
135
- * The other value is a list of the embedding values
222
+ A list where each element is a columnar mapping ::
223
+
224
+ [
225
+ {"embedding_vector": [-0.0022, 0.0457, ... ],
226
+ "reference_id": "image_ref_300000"},
227
+ ...
228
+ ]
136
229
"""
137
230
api_payload = self ._client .make_request (
138
231
payload = None ,
@@ -145,17 +238,20 @@ def export_embeddings(
145
238
def check_annotations_are_in_slice (
146
239
annotations : List [Annotation ], slice_to_check : Slice
147
240
) -> Tuple [bool , Set [str ]]:
148
- """Check membership of the annotation targets within this slice .
241
+ """Checks whether the supplied Annotation objects exist in the supplied Slice .
149
242
150
- annotations: Annnotations with ids referring to targets.
151
- slice: The slice to check against .
243
+ This endpoint checks whether each Annotation object's reference ID (of the
244
+ parent DatasetItem) exists in the Slice .
152
245
246
+ Args:
247
+ annotations: Annnotations with ids referring to targets.
248
+ slice: The slice to check against.
153
249
154
250
Returns:
155
- A tuple, where the first element is true/false whether the annotations are all
156
- in the slice.
157
- The second element is the list of item_ids not in the slice.
158
- The third element is the list of ref_ids not in the slice .
251
+ A tuple of two elements.
252
+
253
+ #. True if all Annotations are in the Slice, False otherwise;
254
+ #. List of reference IDs not in the Slice .
159
255
"""
160
256
info = slice_to_check .info ()
161
257
0 commit comments