1
- from typing import List
1
+ from typing import Dict , List , Iterable , Set , Tuple , Optional , Union
2
+ from nucleus .dataset_item import DatasetItem
3
+ from nucleus .annotation import Annotation
4
+ from nucleus .utils import format_dataset_item_response
5
+
6
+ from .constants import DEFAULT_ANNOTATION_UPDATE_MODE
2
7
3
8
4
9
class Slice :
@@ -9,6 +14,7 @@ class Slice:
9
14
def __init__ (self , slice_id : str , client ):
10
15
self .slice_id = slice_id
11
16
self ._client = client
17
+ self ._dataset_id = None
12
18
13
19
def __repr__ (self ):
14
20
return f"Slice(slice_id='{ self .slice_id } ', client={ self ._client } )"
@@ -19,6 +25,13 @@ def __eq__(self, other):
19
25
return True
20
26
return False
21
27
28
+ @property
29
+ def dataset_id (self ):
30
+ """The id of the dataset this slice belongs to."""
31
+ if self ._dataset_id is None :
32
+ self .info ()
33
+ return self ._dataset_id
34
+
22
35
def info (self ) -> dict :
23
36
"""
24
37
This endpoint provides information about specified slice.
@@ -30,7 +43,9 @@ def info(self) -> dict:
30
43
"dataset_items",
31
44
}
32
45
"""
33
- return self ._client .slice_info (self .slice_id )
46
+ info = self ._client .slice_info (self .slice_id )
47
+ self ._dataset_id = info ["dataset_id" ]
48
+ return info
34
49
35
50
def append (
36
51
self ,
@@ -57,3 +72,118 @@ def append(
57
72
reference_ids = reference_ids ,
58
73
)
59
74
return response
75
+
76
+ def items_and_annotation_generator (
77
+ self ,
78
+ ) -> Iterable [Dict [str , Union [DatasetItem , Dict [str , List [Annotation ]]]]]:
79
+ """Returns an iterable of all DatasetItems and Annotations in this slice.
80
+
81
+ Returns:
82
+ An iterable, where each item is a dict with two keys representing a row
83
+ in the dataset.
84
+ * One value in the dict is the DatasetItem, containing a reference to the
85
+ item that was annotated, for example an image_url.
86
+ * The other value is a dictionary containing all the annotations for this
87
+ dataset item, sorted by annotation type.
88
+ """
89
+ info = self .info ()
90
+ for item_metadata in info ["dataset_items" ]:
91
+ yield format_dataset_item_response (
92
+ self ._client .dataitem_loc (
93
+ dataset_id = info ["dataset_id" ],
94
+ dataset_item_id = item_metadata ["id" ],
95
+ )
96
+ )
97
+
98
+ def items_and_annotations (
99
+ self ,
100
+ ) -> List [Dict [str , Union [DatasetItem , Dict [str , List [Annotation ]]]]]:
101
+ """Returns a list of all DatasetItems and Annotations in this slice.
102
+
103
+ Returns:
104
+ A list, where each item is a dict with two keys representing a row
105
+ in the dataset.
106
+ * One value in the dict is the DatasetItem, containing a reference to the
107
+ item that was annotated.
108
+ * The other value is a dictionary containing all the annotations for this
109
+ dataset item, sorted by annotation type.
110
+ """
111
+ return list (self .items_and_annotation_generator ())
112
+
113
+ def annotate (
114
+ self ,
115
+ annotations : List [Annotation ],
116
+ update : Optional [bool ] = DEFAULT_ANNOTATION_UPDATE_MODE ,
117
+ batch_size : int = 5000 ,
118
+ strict = True ,
119
+ ):
120
+ """Update annotations within this slice.
121
+
122
+ Args:
123
+ annotations: List of annotations to upload
124
+ batch_size: How many annotations to send per request.
125
+ strict: Whether to first check that the annotations belong to this slice.
126
+ Set to false to avoid this check and speed up upload.
127
+ """
128
+ if strict :
129
+ (
130
+ annotations_are_in_slice ,
131
+ item_ids_not_found_in_slice ,
132
+ reference_ids_not_found_in_slice ,
133
+ ) = check_annotations_are_in_slice (annotations , self )
134
+ if not annotations_are_in_slice :
135
+ message = "Not all annotations are in this slice.\n "
136
+ if item_ids_not_found_in_slice :
137
+ message += f"Item ids not found in slice: { item_ids_not_found_in_slice } \n "
138
+ if reference_ids_not_found_in_slice :
139
+ message += f"Reference ids not found in slice: { reference_ids_not_found_in_slice } "
140
+ raise ValueError (message )
141
+ self ._client .annotate_dataset (
142
+ dataset_id = self .dataset_id ,
143
+ annotations = annotations ,
144
+ update = update ,
145
+ batch_size = batch_size ,
146
+ )
147
+
148
+
149
+ def check_annotations_are_in_slice (
150
+ annotations : List [Annotation ], slice_to_check : Slice
151
+ ) -> Tuple [bool , Set [str ], Set [str ]]:
152
+ """Check membership of the annotation targets within this slice.
153
+
154
+ annotations: Annnotations with ids referring to targets.
155
+ slice: The slice to check against.
156
+
157
+
158
+ Returns:
159
+ A tuple, where the first element is true/false whether the annotations are all
160
+ in the slice.
161
+ The second element is the list of item_ids not in the slice.
162
+ The third element is the list of ref_ids not in the slice.
163
+ """
164
+ info = slice_to_check .info ()
165
+
166
+ item_ids_not_found_in_slice = {
167
+ annotation .item_id
168
+ for annotation in annotations
169
+ if annotation .item_id is not None
170
+ }.difference (
171
+ {item_metadata ["id" ] for item_metadata in info ["dataset_items" ]}
172
+ )
173
+ reference_ids_not_found_in_slice = {
174
+ annotation .reference_id
175
+ for annotation in annotations
176
+ if annotation .reference_id is not None
177
+ }.difference (
178
+ {item_metadata ["ref_id" ] for item_metadata in info ["dataset_items" ]}
179
+ )
180
+ if item_ids_not_found_in_slice or reference_ids_not_found_in_slice :
181
+ annotations_are_in_slice = False
182
+ else :
183
+ annotations_are_in_slice = True
184
+
185
+ return (
186
+ annotations_are_in_slice ,
187
+ item_ids_not_found_in_slice ,
188
+ reference_ids_not_found_in_slice ,
189
+ )
0 commit comments