5
5
6
6
from tqdm import tqdm
7
7
8
- from labelbox .schema . ontology import OntologyBuilder
8
+ from labelbox .schema import ontology
9
9
from labelbox .orm .model import Entity
10
+ from ..ontology import get_classifications , get_tools
10
11
from ..generator import PrefetchGenerator
11
12
from .label import Label
12
13
13
14
logger = logging .getLogger (__name__ )
14
15
15
16
16
- class LabelCollection :
17
+ class LabelList :
17
18
"""
18
19
A container for interacting with a collection of labels.
19
20
Less memory efficient than LabelGenerator but more performant and convenient to use.
@@ -25,15 +26,15 @@ def __init__(self, data: Iterable[Label]):
25
26
self ._index = 0
26
27
27
28
def assign_schema_ids (
28
- self , ontology_builder : OntologyBuilder ) -> "LabelCollection " :
29
+ self , ontology_builder : "ontology. OntologyBuilder" ) -> "LabelList " :
29
30
"""
30
31
Adds schema ids to all FeatureSchema objects in the Labels.
31
32
This is necessary for MAL.
32
33
33
34
Args:
34
- ontology_builder: The ontology that matches the feature names assigned to objects in this LabelCollection
35
+ ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
35
36
Returns:
36
- LabelCollection . useful for chaining these modifying functions
37
+ LabelList . useful for chaining these modifying functions
37
38
"""
38
39
for label in self ._data :
39
40
label .assign_schema_ids (ontology_builder )
@@ -42,7 +43,7 @@ def assign_schema_ids(
42
43
def add_to_dataset (self ,
43
44
dataset : "Entity.Dataset" ,
44
45
signer : Callable [[bytes ], str ],
45
- max_concurrency = 20 ) -> "LabelCollection " :
46
+ max_concurrency = 20 ) -> "LabelList " :
46
47
"""
47
48
Creates data rows from each labels data object and attaches the data to the given dataset.
48
49
Updates the label's data object to have the same external_id and uid as the data row.
@@ -55,15 +56,15 @@ def add_to_dataset(self,
55
56
dataset: labelbox dataset object to add the new data row to
56
57
signer: A function that accepts bytes and returns a signed url.
57
58
Returns:
58
- LabelCollection with updated references to new data rows
59
+ LabelList with updated references to new data rows
59
60
"""
60
61
self ._ensure_unique_external_ids ()
61
62
self .add_url_to_data (signer , max_concurrency = max_concurrency )
62
63
upload_task = dataset .create_data_rows ([{
63
- Entity . DataRow . row_data : label .data .url ,
64
- Entity . DataRow . external_id : label .data .external_id
64
+ ' row_data' : label .data .url ,
65
+ ' external_id' : label .data .external_id
65
66
} for label in self ._data ])
66
- upload_task .wait_til_done ()
67
+ upload_task .wait_till_done ()
67
68
68
69
data_row_lookup = {
69
70
data_row .external_id : data_row .uid
@@ -73,9 +74,9 @@ def add_to_dataset(self,
73
74
label .data .uid = data_row_lookup [label .data .external_id ]
74
75
return self
75
76
76
- def add_url_to_masks (self , signer , max_concurrency = 20 ) -> "LabelCollection " :
77
+ def add_url_to_masks (self , signer , max_concurrency = 20 ) -> "LabelList " :
77
78
"""
78
- Creates signed urls for all masks in the LabelCollection .
79
+ Creates signed urls for all masks in the LabelList .
79
80
Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
80
81
Only uploads url if one doesn't already exist.
81
82
@@ -84,15 +85,15 @@ def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
84
85
max_concurrency: how many threads to use for uploading.
85
86
Should be balanced to match the signing services capabilities.
86
87
Returns:
87
- LabelCollection with updated references to the new mask urls
88
+ LabelList with updated references to the new mask urls
88
89
"""
89
90
for row in self ._apply_threaded (
90
91
[label .add_url_to_masks for label in self ._data ], max_concurrency ,
91
92
signer ):
92
93
...
93
94
return self
94
95
95
- def add_url_to_data (self , signer , max_concurrency = 20 ) -> "LabelCollection " :
96
+ def add_url_to_data (self , signer , max_concurrency = 20 ) -> "LabelList " :
96
97
"""
97
98
Creates signed urls for the data
98
99
Only uploads url if one doesn't already exist.
@@ -102,32 +103,46 @@ def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
102
103
max_concurrency: how many threads to use for uploading.
103
104
Should be balanced to match the signing services capabilities.
104
105
Returns:
105
- LabelCollection with updated references to the new data urls
106
+ LabelList with updated references to the new data urls
106
107
"""
107
108
for row in self ._apply_threaded (
108
109
[label .add_url_to_data for label in self ._data ], max_concurrency ,
109
110
signer ):
110
111
...
111
112
return self
112
113
114
+ def get_ontology (self ) -> ontology .OntologyBuilder :
115
+ classifications = []
116
+ tools = []
117
+ for label in self ._data :
118
+ tools = get_tools (label .object_annotations (), tools )
119
+ classifications = get_classifications (
120
+ label .classification_annotations (), classifications )
121
+ return ontology .OntologyBuilder (tools = tools ,
122
+ classifications = classifications )
123
+
113
124
def _ensure_unique_external_ids (self ) -> None :
114
125
external_ids = set ()
115
126
for label in self ._data :
116
127
if label .data .external_id is None :
117
- label .data .external_id = uuid4 ()
128
+ label .data .external_id = str ( uuid4 () )
118
129
else :
119
130
if label .data .external_id in external_ids :
120
131
raise ValueError (
121
132
f"External ids must be unique for bulk uploading. Found { label .data .external_id } more than once."
122
133
)
123
134
external_ids .add (label .data .external_id )
124
135
125
- def __iter__ (self ) -> "LabelCollection" :
136
+ def append (self , label : Label ):
137
+ self ._data .append (label )
138
+
139
+ def __iter__ (self ) -> "LabelList" :
126
140
self ._index = 0
127
141
return self
128
142
129
143
def __next__ (self ) -> Label :
130
144
if self ._index == len (self ._data ):
145
+ self ._index = 0
131
146
raise StopIteration
132
147
133
148
value = self ._data [self ._index ]
@@ -154,18 +169,19 @@ class LabelGenerator(PrefetchGenerator):
154
169
A container for interacting with a collection of labels.
155
170
156
171
Use this class if you have larger data. It is slightly harder to work with
157
- than the LabelCollection but will be much more memory efficient.
172
+ than the LabelList but will be much more memory efficient.
158
173
"""
159
174
160
175
def __init__ (self , data : Generator [Label , None , None ], * args , ** kwargs ):
161
176
self ._fns = {}
162
177
super ().__init__ (data , * args , ** kwargs )
163
178
164
- def as_collection (self ) -> "LabelCollection " :
165
- return LabelCollection (data = list (self ))
179
+ def as_list (self ) -> "LabelList " :
180
+ return LabelList (data = list (self ))
166
181
167
182
def assign_schema_ids (
168
- self , ontology_builder : OntologyBuilder ) -> "LabelGenerator" :
183
+ self ,
184
+ ontology_builder : "ontology.OntologyBuilder" ) -> "LabelGenerator" :
169
185
170
186
def _assign_ids (label : Label ):
171
187
label .assign_schema_ids (ontology_builder )
@@ -190,7 +206,7 @@ def _add_url_to_data(label: Label):
190
206
label .add_url_to_data (signer )
191
207
return label
192
208
193
- self ._fns ['_add_url_to_data ' ] = _add_url_to_data
209
+ self ._fns ['add_url_to_data ' ] = _add_url_to_data
194
210
return self
195
211
196
212
def add_to_dataset (self , dataset : "Entity.Dataset" ,
@@ -199,7 +215,7 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
199
215
Creates data rows from each labels data object and attaches the data to the given dataset.
200
216
Updates the label's data object to have the same external_id and uid as the data row.
201
217
202
- This is a lot slower than LabelCollection .add_to_dataset but also more memory efficient.
218
+ This is a lot slower than LabelList .add_to_dataset but also more memory efficient.
203
219
204
220
Args:
205
221
dataset: labelbox dataset object to add the new data row to
@@ -237,6 +253,20 @@ def _add_url_to_masks(label: Label):
237
253
self ._fns ['add_url_to_masks' ] = _add_url_to_masks
238
254
return self
239
255
256
+ def register_background_fn (self , fn : Callable [[Label ], Label ],
257
+ name : str ) -> "LabelGenerator" :
258
+ """
259
+ Allows users to add arbitrary io functions to the generator.
260
+ These functions will be exectuted in parallel and added to a prefetch queue.
261
+
262
+ Args:
263
+ fn: Callable that modifies a label and then returns the same label
264
+ - For performance reasons, this function shouldn't run if the object already has the desired state.
265
+ name: Register the name of the function. If the name already exists, then the function will be replaced.
266
+ """
267
+ self ._fns [name ] = fn
268
+ return self
269
+
240
270
def __iter__ (self ):
241
271
return self
242
272
@@ -255,4 +285,4 @@ def __next__(self):
255
285
return self ._process (value )
256
286
257
287
258
- LabelData = Union [LabelCollection , LabelGenerator ]
288
+ LabelCollection = Union [LabelList , LabelGenerator ]
0 commit comments