Skip to content

Commit d254caf

Browse files
authored
[PLT-599] Remove deprecated class LabelList (#1691)
2 parents 3680d38 + 92b35aa commit d254caf

File tree

2 files changed

+0
-171
lines changed

2 files changed

+0
-171
lines changed

libs/labelbox/src/labelbox/data/annotation_types/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
from .data import LlmResponseCreationData
4848

4949
from .label import Label
50-
from .collection import LabelList
5150
from .collection import LabelGenerator
5251

5352
from .metrics import ScalarMetric

libs/labelbox/src/labelbox/data/annotation_types/collection.py

Lines changed: 0 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -15,168 +15,6 @@
1515
logger = logging.getLogger(__name__)
1616

1717

18-
class LabelList:
19-
"""
20-
A container for interacting with a collection of labels.
21-
Less memory efficient than LabelGenerator but more performant and convenient to use.
22-
Use on smaller datasets.
23-
"""
24-
25-
def __init__(self, data: Optional[Iterable[Label]] = None):
26-
warnings.warn("LabelList is deprecated and will be "
27-
"removed in a future release.")
28-
29-
if data is None:
30-
self._data = []
31-
elif isinstance(data, Label):
32-
self._data = [data]
33-
else:
34-
self._data = data
35-
self._index = 0
36-
37-
def assign_feature_schema_ids(
38-
self, ontology_builder: "ontology.OntologyBuilder") -> "LabelList":
39-
"""
40-
Adds schema ids to all FeatureSchema objects in the Labels.
41-
42-
Args:
43-
ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
44-
Returns:
45-
LabelList. useful for chaining these modifying functions
46-
47-
Note: You can now import annotations using names directly without having to lookup schema_ids
48-
"""
49-
warnings.warn("This method is deprecated and will be "
50-
"removed in a future release. Feature schema ids"
51-
" are no longer required for importing.")
52-
for label in self._data:
53-
label.assign_feature_schema_ids(ontology_builder)
54-
return self
55-
56-
def add_to_dataset(self,
57-
dataset: "Entity.Dataset",
58-
signer: Callable[[bytes], str],
59-
max_concurrency=20) -> "LabelList":
60-
"""
61-
Creates data rows from each labels data object and attaches the data to the given dataset.
62-
Updates the label's data object to have the same external_id and uid as the data row.
63-
It is reccomended to create a new dataset if memory is a concern because all dataset data rows are exported to make this faster.
64-
Also note that this relies on exported data that it cached.
65-
So this will not work on the same dataset more frequently than every 30 min.
66-
The workaround is creating a new dataset each time this function is used.
67-
68-
Args:
69-
dataset: labelbox dataset object to add the new data row to
70-
signer: A function that accepts bytes and returns a signed url.
71-
Returns:
72-
LabelList with updated references to new data rows
73-
"""
74-
self._ensure_unique_external_ids()
75-
self.add_url_to_data(signer, max_concurrency=max_concurrency)
76-
upload_task = dataset.create_data_rows([{
77-
'row_data': label.data.url,
78-
'external_id': label.data.external_id
79-
} for label in self._data])
80-
upload_task.wait_till_done()
81-
82-
data_row_lookup = {
83-
data_row.external_id: data_row.uid
84-
for data_row in dataset.export_data_rows()
85-
}
86-
for label in self._data:
87-
label.data.uid = data_row_lookup[label.data.external_id]
88-
return self
89-
90-
def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelList":
91-
"""
92-
Creates signed urls for all masks in the LabelList.
93-
Multiple masks objects can reference the same MaskData so this makes sure we only upload that url once.
94-
Only uploads url if one doesn't already exist.
95-
96-
Args:
97-
signer: A function that accepts bytes and returns a signed url.
98-
max_concurrency: how many threads to use for uploading.
99-
Should be balanced to match the signing services capabilities.
100-
Returns:
101-
LabelList with updated references to the new mask urls
102-
"""
103-
for row in self._apply_threaded(
104-
[label.add_url_to_masks for label in self._data], max_concurrency,
105-
signer):
106-
...
107-
return self
108-
109-
def add_url_to_data(self, signer, max_concurrency=20) -> "LabelList":
110-
"""
111-
Creates signed urls for the data
112-
Only uploads url if one doesn't already exist.
113-
114-
Args:
115-
signer: A function that accepts bytes and returns a signed url.
116-
max_concurrency: how many threads to use for uploading.
117-
Should be balanced to match the signing services capabilities.
118-
Returns:
119-
LabelList with updated references to the new data urls
120-
"""
121-
for row in self._apply_threaded(
122-
[label.add_url_to_data for label in self._data], max_concurrency,
123-
signer):
124-
...
125-
return self
126-
127-
def get_ontology(self) -> ontology.OntologyBuilder:
128-
classifications = []
129-
tools = []
130-
for label in self._data:
131-
tools = get_tools(label.object_annotations(), tools)
132-
classifications = get_classifications(
133-
label.classification_annotations(), classifications)
134-
return ontology.OntologyBuilder(tools=tools,
135-
classifications=classifications)
136-
137-
def _ensure_unique_external_ids(self) -> None:
138-
external_ids = set()
139-
for label in self._data:
140-
if label.data.external_id is None:
141-
label.data.external_id = str(uuid4())
142-
else:
143-
if label.data.external_id in external_ids:
144-
raise ValueError(
145-
f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
146-
)
147-
external_ids.add(label.data.external_id)
148-
149-
def append(self, label: Label) -> None:
150-
self._data.append(label)
151-
152-
def __iter__(self) -> "LabelList":
153-
self._index = 0
154-
return self
155-
156-
def __next__(self) -> Label:
157-
if self._index == len(self._data):
158-
self._index = 0
159-
raise StopIteration
160-
161-
value = self._data[self._index]
162-
self._index += 1
163-
return value
164-
165-
def __len__(self) -> int:
166-
return len(self._data)
167-
168-
def __getitem__(self, idx: int) -> Label:
169-
return self._data[idx]
170-
171-
def _apply_threaded(self, fns, max_concurrency, *args):
172-
futures = []
173-
with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
174-
for fn in fns:
175-
futures.append(executor.submit(fn, *args))
176-
for future in tqdm(as_completed(futures)):
177-
yield future.result()
178-
179-
18018
class LabelGenerator(PrefetchGenerator):
18119
"""
18220
A container for interacting with a large collection of labels.
@@ -187,12 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
18725
self._fns = {}
18826
super().__init__(data, *args, **kwargs)
18927

190-
def as_list(self) -> "LabelList":
191-
warnings.warn("This method is deprecated and will be "
192-
"removed in a future release. LabelList"
193-
" class will be deprecated.")
194-
return LabelList(data=list(self))
195-
19628
def assign_feature_schema_ids(
19729
self,
19830
ontology_builder: "ontology.OntologyBuilder") -> "LabelGenerator":
@@ -232,8 +64,6 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
23264
Creates data rows from each labels data object and attaches the data to the given dataset.
23365
Updates the label's data object to have the same external_id and uid as the data row.
23466
235-
This is a lot slower than LabelList.add_to_dataset but also more memory efficient.
236-
23767
Args:
23868
dataset: labelbox dataset object to add the new data row to
23969
signer: A function that accepts bytes and returns a signed url.

0 commit comments

Comments
 (0)