Skip to content

[PLT-599] Remove deprecated class LabelList #1691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
from .data import LlmResponseCreationData

from .label import Label
from .collection import LabelList
from .collection import LabelGenerator

from .metrics import ScalarMetric
Expand Down
170 changes: 0 additions & 170 deletions libs/labelbox/src/labelbox/data/annotation_types/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,168 +15,6 @@
logger = logging.getLogger(__name__)


class LabelList:
"""
A container for interacting with a collection of labels.
Less memory efficient than LabelGenerator but more performant and convenient to use.
Use on smaller datasets.
"""

def __init__(self, data: Optional[Iterable[Label]] = None):
warnings.warn("LabelList is deprecated and will be "
"removed in a future release.")

if data is None:
self._data = []
elif isinstance(data, Label):
self._data = [data]
else:
self._data = data
self._index = 0

def assign_feature_schema_ids(
self, ontology_builder: "ontology.OntologyBuilder") -> "LabelList":
"""
Adds schema ids to all FeatureSchema objects in the Labels.

Args:
ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
Returns:
LabelList. useful for chaining these modifying functions

Note: You can now import annotations using names directly without having to lookup schema_ids
"""
warnings.warn("This method is deprecated and will be "
"removed in a future release. Feature schema ids"
" are no longer required for importing.")
for label in self._data:
label.assign_feature_schema_ids(ontology_builder)
return self

def add_to_dataset(self,
dataset: "Entity.Dataset",
signer: Callable[[bytes], str],
max_concurrency=20) -> "LabelList":
"""
Creates data rows from each labels data object and attaches the data to the given dataset.
Updates the label's data object to have the same external_id and uid as the data row.
It is reccomended to create a new dataset if memory is a concern because all dataset data rows are exported to make this faster.
Also note that this relies on exported data that it cached.
So this will not work on the same dataset more frequently than every 30 min.
The workaround is creating a new dataset each time this function is used.

Args:
dataset: labelbox dataset object to add the new data row to
signer: A function that accepts bytes and returns a signed url.
Returns:
LabelList with updated references to new data rows
"""
self._ensure_unique_external_ids()
self.add_url_to_data(signer, max_concurrency=max_concurrency)
upload_task = dataset.create_data_rows([{
'row_data': label.data.url,
'external_id': label.data.external_id
} for label in self._data])
upload_task.wait_till_done()

data_row_lookup = {
data_row.external_id: data_row.uid
for data_row in dataset.export_data_rows()
}
for label in self._data:
label.data.uid = data_row_lookup[label.data.external_id]
return self

def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelList":
"""
Creates signed urls for all masks in the LabelList.
Multiple masks objects can reference the same MaskData so this makes sure we only upload that url once.
Only uploads url if one doesn't already exist.

Args:
signer: A function that accepts bytes and returns a signed url.
max_concurrency: how many threads to use for uploading.
Should be balanced to match the signing services capabilities.
Returns:
LabelList with updated references to the new mask urls
"""
for row in self._apply_threaded(
[label.add_url_to_masks for label in self._data], max_concurrency,
signer):
...
return self

def add_url_to_data(self, signer, max_concurrency=20) -> "LabelList":
"""
Creates signed urls for the data
Only uploads url if one doesn't already exist.

Args:
signer: A function that accepts bytes and returns a signed url.
max_concurrency: how many threads to use for uploading.
Should be balanced to match the signing services capabilities.
Returns:
LabelList with updated references to the new data urls
"""
for row in self._apply_threaded(
[label.add_url_to_data for label in self._data], max_concurrency,
signer):
...
return self

def get_ontology(self) -> ontology.OntologyBuilder:
classifications = []
tools = []
for label in self._data:
tools = get_tools(label.object_annotations(), tools)
classifications = get_classifications(
label.classification_annotations(), classifications)
return ontology.OntologyBuilder(tools=tools,
classifications=classifications)

def _ensure_unique_external_ids(self) -> None:
external_ids = set()
for label in self._data:
if label.data.external_id is None:
label.data.external_id = str(uuid4())
else:
if label.data.external_id in external_ids:
raise ValueError(
f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
)
external_ids.add(label.data.external_id)

def append(self, label: Label) -> None:
self._data.append(label)

def __iter__(self) -> "LabelList":
self._index = 0
return self

def __next__(self) -> Label:
if self._index == len(self._data):
self._index = 0
raise StopIteration

value = self._data[self._index]
self._index += 1
return value

def __len__(self) -> int:
return len(self._data)

def __getitem__(self, idx: int) -> Label:
return self._data[idx]

def _apply_threaded(self, fns, max_concurrency, *args):
futures = []
with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
for fn in fns:
futures.append(executor.submit(fn, *args))
for future in tqdm(as_completed(futures)):
yield future.result()


class LabelGenerator(PrefetchGenerator):
"""
A container for interacting with a large collection of labels.
Expand All @@ -187,12 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
self._fns = {}
super().__init__(data, *args, **kwargs)

def as_list(self) -> "LabelList":
warnings.warn("This method is deprecated and will be "
"removed in a future release. LabelList"
" class will be deprecated.")
return LabelList(data=list(self))

def assign_feature_schema_ids(
self,
ontology_builder: "ontology.OntologyBuilder") -> "LabelGenerator":
Expand Down Expand Up @@ -232,8 +64,6 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
Creates data rows from each labels data object and attaches the data to the given dataset.
Updates the label's data object to have the same external_id and uid as the data row.

This is a lot slower than LabelList.add_to_dataset but also more memory efficient.

Args:
dataset: labelbox dataset object to add the new data row to
signer: A function that accepts bytes and returns a signed url.
Expand Down
Loading