[PLT-599] Remove deprecated class LabelList (#1691)

vbrodsky · web-flow · commit d254cafa62fc · 2024-06-24T14:44:13.000-04:00
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py
@@ -47,7 +47,6 @@
 from .data import LlmResponseCreationData
 
 from .label import Label
-from .collection import LabelList
 from .collection import LabelGenerator
 
 from .metrics import ScalarMetric
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py
@@ -15,168 +15,6 @@
 logger = logging.getLogger(__name__)
 
 
-class LabelList:
-    """
-    A container for interacting with a collection of labels.
-    Less memory efficient than LabelGenerator but more performant and convenient to use.
-    Use on smaller datasets.
-    """
-
-    def __init__(self, data: Optional[Iterable[Label]] = None):
-        warnings.warn("LabelList is deprecated and will be "
-                      "removed in a future release.")
-
-        if data is None:
-            self._data = []
-        elif isinstance(data, Label):
-            self._data = [data]
-        else:
-            self._data = data
-        self._index = 0
-
-    def assign_feature_schema_ids(
-            self, ontology_builder: "ontology.OntologyBuilder") -> "LabelList":
-        """
-        Adds schema ids to all FeatureSchema objects in the Labels.
-
-        Args:
-            ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
-        Returns:
-            LabelList. useful for chaining these modifying functions
-
-        Note: You can now import annotations using names directly without having to lookup schema_ids  
-        """
-        warnings.warn("This method is deprecated and will be "
-                      "removed in a future release. Feature schema ids"
-                      " are no longer required for importing.")
-        for label in self._data:
-            label.assign_feature_schema_ids(ontology_builder)
-        return self
-
-    def add_to_dataset(self,
-                       dataset: "Entity.Dataset",
-                       signer: Callable[[bytes], str],
-                       max_concurrency=20) -> "LabelList":
-        """
-        Creates data rows from each labels data object and attaches the data to the given dataset.
-        Updates the label's data object to have the same external_id and uid as the data row.
-        It is reccomended to create a new dataset if memory is a concern because all dataset data rows are exported to make this faster.
-        Also note that this relies on exported data that it cached.
-        So this will not work on the same dataset more frequently than every 30 min.
-        The workaround is creating a new dataset each time this function is used.
-
-        Args:
-            dataset: labelbox dataset object to add the new data row to
-            signer: A function that accepts bytes and returns a signed url.
-        Returns:
-            LabelList with updated references to new data rows
-        """
-        self._ensure_unique_external_ids()
-        self.add_url_to_data(signer, max_concurrency=max_concurrency)
-        upload_task = dataset.create_data_rows([{
-            'row_data': label.data.url,
-            'external_id': label.data.external_id
-        } for label in self._data])
-        upload_task.wait_till_done()
-
-        data_row_lookup = {
-            data_row.external_id: data_row.uid
-            for data_row in dataset.export_data_rows()
-        }
-        for label in self._data:
-            label.data.uid = data_row_lookup[label.data.external_id]
-        return self
-
-    def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelList":
-        """
-        Creates signed urls for all masks in the LabelList.
-        Multiple masks objects can reference the same MaskData so this makes sure we only upload that url once.
-        Only uploads url if one doesn't already exist.
-
-        Args:
-            signer: A function that accepts bytes and returns a signed url.
-            max_concurrency: how many threads to use for uploading.
-                Should be balanced to match the signing services capabilities.
-        Returns:
-            LabelList with updated references to the new mask urls
-        """
-        for row in self._apply_threaded(
-            [label.add_url_to_masks for label in self._data], max_concurrency,
-                signer):
-            ...
-        return self
-
-    def add_url_to_data(self, signer, max_concurrency=20) -> "LabelList":
-        """
-        Creates signed urls for the data
-        Only uploads url if one doesn't already exist.
-
-        Args:
-            signer: A function that accepts bytes and returns a signed url.
-            max_concurrency: how many threads to use for uploading.
-                Should be balanced to match the signing services capabilities.
-        Returns:
-            LabelList with updated references to the new data urls
-        """
-        for row in self._apply_threaded(
-            [label.add_url_to_data for label in self._data], max_concurrency,
-                signer):
-            ...
-        return self
-
-    def get_ontology(self) -> ontology.OntologyBuilder:
-        classifications = []
-        tools = []
-        for label in self._data:
-            tools = get_tools(label.object_annotations(), tools)
-            classifications = get_classifications(
-                label.classification_annotations(), classifications)
-        return ontology.OntologyBuilder(tools=tools,
-                                        classifications=classifications)
-
-    def _ensure_unique_external_ids(self) -> None:
-        external_ids = set()
-        for label in self._data:
-            if label.data.external_id is None:
-                label.data.external_id = str(uuid4())
-            else:
-                if label.data.external_id in external_ids:
-                    raise ValueError(
-                        f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
-                    )
-            external_ids.add(label.data.external_id)
-
-    def append(self, label: Label) -> None:
-        self._data.append(label)
-
-    def __iter__(self) -> "LabelList":
-        self._index = 0
-        return self
-
-    def __next__(self) -> Label:
-        if self._index == len(self._data):
-            self._index = 0
-            raise StopIteration
-
-        value = self._data[self._index]
-        self._index += 1
-        return value
-
-    def __len__(self) -> int:
-        return len(self._data)
-
-    def __getitem__(self, idx: int) -> Label:
-        return self._data[idx]
-
-    def _apply_threaded(self, fns, max_concurrency, *args):
-        futures = []
-        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
-            for fn in fns:
-                futures.append(executor.submit(fn, *args))
-            for future in tqdm(as_completed(futures)):
-                yield future.result()
-
-
 class LabelGenerator(PrefetchGenerator):
     """
     A container for interacting with a large collection of labels. 
@@ -187,12 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
         self._fns = {}
         super().__init__(data, *args, **kwargs)
 
-    def as_list(self) -> "LabelList":
-        warnings.warn("This method is deprecated and will be "
-                      "removed in a future release. LabelList"
-                      " class will be deprecated.")
-        return LabelList(data=list(self))
-
     def assign_feature_schema_ids(
             self,
             ontology_builder: "ontology.OntologyBuilder") -> "LabelGenerator":
@@ -232,8 +64,6 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
         Creates data rows from each labels data object and attaches the data to the given dataset.
         Updates the label's data object to have the same external_id and uid as the data row.
 
-        This is a lot slower than LabelList.add_to_dataset but also more memory efficient.
-
         Args:
             dataset: labelbox dataset object to add the new data row to
             signer: A function that accepts bytes and returns a signed url.