15
15
logger = logging .getLogger (__name__ )
16
16
17
17
18
- class LabelList :
19
- """
20
- A container for interacting with a collection of labels.
21
- Less memory efficient than LabelGenerator but more performant and convenient to use.
22
- Use on smaller datasets.
23
- """
24
-
25
- def __init__ (self , data : Optional [Iterable [Label ]] = None ):
26
- warnings .warn ("LabelList is deprecated and will be "
27
- "removed in a future release." )
28
-
29
- if data is None :
30
- self ._data = []
31
- elif isinstance (data , Label ):
32
- self ._data = [data ]
33
- else :
34
- self ._data = data
35
- self ._index = 0
36
-
37
- def assign_feature_schema_ids (
38
- self , ontology_builder : "ontology.OntologyBuilder" ) -> "LabelList" :
39
- """
40
- Adds schema ids to all FeatureSchema objects in the Labels.
41
-
42
- Args:
43
- ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
44
- Returns:
45
- LabelList. useful for chaining these modifying functions
46
-
47
- Note: You can now import annotations using names directly without having to lookup schema_ids
48
- """
49
- warnings .warn ("This method is deprecated and will be "
50
- "removed in a future release. Feature schema ids"
51
- " are no longer required for importing." )
52
- for label in self ._data :
53
- label .assign_feature_schema_ids (ontology_builder )
54
- return self
55
-
56
- def add_to_dataset (self ,
57
- dataset : "Entity.Dataset" ,
58
- signer : Callable [[bytes ], str ],
59
- max_concurrency = 20 ) -> "LabelList" :
60
- """
61
- Creates data rows from each labels data object and attaches the data to the given dataset.
62
- Updates the label's data object to have the same external_id and uid as the data row.
63
- It is reccomended to create a new dataset if memory is a concern because all dataset data rows are exported to make this faster.
64
- Also note that this relies on exported data that it cached.
65
- So this will not work on the same dataset more frequently than every 30 min.
66
- The workaround is creating a new dataset each time this function is used.
67
-
68
- Args:
69
- dataset: labelbox dataset object to add the new data row to
70
- signer: A function that accepts bytes and returns a signed url.
71
- Returns:
72
- LabelList with updated references to new data rows
73
- """
74
- self ._ensure_unique_external_ids ()
75
- self .add_url_to_data (signer , max_concurrency = max_concurrency )
76
- upload_task = dataset .create_data_rows ([{
77
- 'row_data' : label .data .url ,
78
- 'external_id' : label .data .external_id
79
- } for label in self ._data ])
80
- upload_task .wait_till_done ()
81
-
82
- data_row_lookup = {
83
- data_row .external_id : data_row .uid
84
- for data_row in dataset .export_data_rows ()
85
- }
86
- for label in self ._data :
87
- label .data .uid = data_row_lookup [label .data .external_id ]
88
- return self
89
-
90
- def add_url_to_masks (self , signer , max_concurrency = 20 ) -> "LabelList" :
91
- """
92
- Creates signed urls for all masks in the LabelList.
93
- Multiple masks objects can reference the same MaskData so this makes sure we only upload that url once.
94
- Only uploads url if one doesn't already exist.
95
-
96
- Args:
97
- signer: A function that accepts bytes and returns a signed url.
98
- max_concurrency: how many threads to use for uploading.
99
- Should be balanced to match the signing services capabilities.
100
- Returns:
101
- LabelList with updated references to the new mask urls
102
- """
103
- for row in self ._apply_threaded (
104
- [label .add_url_to_masks for label in self ._data ], max_concurrency ,
105
- signer ):
106
- ...
107
- return self
108
-
109
- def add_url_to_data (self , signer , max_concurrency = 20 ) -> "LabelList" :
110
- """
111
- Creates signed urls for the data
112
- Only uploads url if one doesn't already exist.
113
-
114
- Args:
115
- signer: A function that accepts bytes and returns a signed url.
116
- max_concurrency: how many threads to use for uploading.
117
- Should be balanced to match the signing services capabilities.
118
- Returns:
119
- LabelList with updated references to the new data urls
120
- """
121
- for row in self ._apply_threaded (
122
- [label .add_url_to_data for label in self ._data ], max_concurrency ,
123
- signer ):
124
- ...
125
- return self
126
-
127
- def get_ontology (self ) -> ontology .OntologyBuilder :
128
- classifications = []
129
- tools = []
130
- for label in self ._data :
131
- tools = get_tools (label .object_annotations (), tools )
132
- classifications = get_classifications (
133
- label .classification_annotations (), classifications )
134
- return ontology .OntologyBuilder (tools = tools ,
135
- classifications = classifications )
136
-
137
- def _ensure_unique_external_ids (self ) -> None :
138
- external_ids = set ()
139
- for label in self ._data :
140
- if label .data .external_id is None :
141
- label .data .external_id = str (uuid4 ())
142
- else :
143
- if label .data .external_id in external_ids :
144
- raise ValueError (
145
- f"External ids must be unique for bulk uploading. Found { label .data .external_id } more than once."
146
- )
147
- external_ids .add (label .data .external_id )
148
-
149
- def append (self , label : Label ) -> None :
150
- self ._data .append (label )
151
-
152
- def __iter__ (self ) -> "LabelList" :
153
- self ._index = 0
154
- return self
155
-
156
- def __next__ (self ) -> Label :
157
- if self ._index == len (self ._data ):
158
- self ._index = 0
159
- raise StopIteration
160
-
161
- value = self ._data [self ._index ]
162
- self ._index += 1
163
- return value
164
-
165
- def __len__ (self ) -> int :
166
- return len (self ._data )
167
-
168
- def __getitem__ (self , idx : int ) -> Label :
169
- return self ._data [idx ]
170
-
171
- def _apply_threaded (self , fns , max_concurrency , * args ):
172
- futures = []
173
- with ThreadPoolExecutor (max_workers = max_concurrency ) as executor :
174
- for fn in fns :
175
- futures .append (executor .submit (fn , * args ))
176
- for future in tqdm (as_completed (futures )):
177
- yield future .result ()
178
-
179
-
180
18
class LabelGenerator (PrefetchGenerator ):
181
19
"""
182
20
A container for interacting with a large collection of labels.
@@ -187,12 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
187
25
self ._fns = {}
188
26
super ().__init__ (data , * args , ** kwargs )
189
27
190
- def as_list (self ) -> "LabelList" :
191
- warnings .warn ("This method is deprecated and will be "
192
- "removed in a future release. LabelList"
193
- " class will be deprecated." )
194
- return LabelList (data = list (self ))
195
-
196
28
def assign_feature_schema_ids (
197
29
self ,
198
30
ontology_builder : "ontology.OntologyBuilder" ) -> "LabelGenerator" :
@@ -232,8 +64,6 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
232
64
Creates data rows from each labels data object and attaches the data to the given dataset.
233
65
Updates the label's data object to have the same external_id and uid as the data row.
234
66
235
- This is a lot slower than LabelList.add_to_dataset but also more memory efficient.
236
-
237
67
Args:
238
68
dataset: labelbox dataset object to add the new data row to
239
69
signer: A function that accepts bytes and returns a signed url.
0 commit comments