PR feedback

Val Brodsky · Val Brodsky · commit 2aa342bddfd7 · 2024-06-03T19:21:21.000-07:00
diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py
@@ -33,7 +33,7 @@
 from labelbox.schema.user import User
 from labelbox.schema.iam_integration import IAMIntegration
 from labelbox.schema.internal.data_row_upsert_item import (DataRowUpsertItem)
-from labelbox.schema.internal.data_row_uploader import DataRowUploader
+import labelbox.schema.internal.data_row_uploader as data_row_uploader
 from labelbox.schema.internal.descriptor_file_creator import DescriptorFileCreator
 from labelbox.schema.internal.datarow_upload_constants import (
     FILE_UPLOAD_THREAD_COUNT, UPSERT_CHUNK_SIZE_BYTES)
@@ -262,7 +262,7 @@ def create_data_rows(self,
         Use this instead of `Dataset.create_data_rows_sync` uploads for batches that contain more than 1000 data rows.
 
         Args:
-            items (iterable of (dict or str)): See the docstring for `DataRowUploader.create_descriptor_file` for more information
+            items (iterable of (dict or str))
 
         Returns:
             Task representing the data import on the server side. The Task
@@ -617,7 +617,7 @@ def _exec_upsert_data_rows(
         file_upload_thread_count: int = FILE_UPLOAD_THREAD_COUNT
     ) -> "DataUpsertTask":
 
-        manifest = DataRowUploader.upload_in_chunks(
+        manifest = data_row_uploader.upload_in_chunks(
             client=self.client,
             specs=specs,
             file_upload_thread_count=file_upload_thread_count,
diff --git a/libs/labelbox/src/labelbox/schema/internal/data_row_uploader.py b/libs/labelbox/src/labelbox/schema/internal/data_row_uploader.py
@@ -13,22 +13,18 @@ class UploadManifest(pydantic_compat.BaseModel):
     chunk_uris: List[str]
 
 
-class DataRowUploader:
+def upload_in_chunks(client, specs: List[DataRowUpsertItem],
+                     file_upload_thread_count: int,
+                     max_chunk_size_bytes: int) -> UploadManifest:
+    empty_specs = list(filter(lambda spec: spec.is_empty(), specs))
 
-    @staticmethod
-    def upload_in_chunks(client, specs: List[DataRowUpsertItem],
-                         file_upload_thread_count: int,
-                         max_chunk_size_bytes: int) -> UploadManifest:
-        empty_specs = list(filter(lambda spec: spec.is_empty(), specs))
+    if empty_specs:
+        ids = list(map(lambda spec: spec.id.get("value"), empty_specs))
+        raise ValueError(f"The following items have an empty payload: {ids}")
 
-        if empty_specs:
-            ids = list(map(lambda spec: spec.id.get("value"), empty_specs))
-            raise ValueError(
-                f"The following items have an empty payload: {ids}")
+    chunk_uris = DescriptorFileCreator(client).create(
+        specs, max_chunk_size_bytes=max_chunk_size_bytes)
 
-        chunk_uris = DescriptorFileCreator(client).create(
-            specs, max_chunk_size_bytes=max_chunk_size_bytes)
-
-        return UploadManifest(source="SDK",
-                              item_count=len(specs),
-                              chunk_uris=chunk_uris)
+    return UploadManifest(source="SDK",
+                          item_count=len(specs),
+                          chunk_uris=chunk_uris)
diff --git a/libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py b/libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py
@@ -28,10 +28,7 @@ class DescriptorFileCreator:
 
     Args:
         client (Client): The client object
-        is_upsert (bool): Whether the upload is an upsert. This is a legacy parameter and should always be True because this class will only support upsert
         max_chunk_size_bytes (int): The maximum size of the file in bytes
-
-    TODO: Remove is_upsert parameter
     """
 
     def __init__(self, client: "Client"):
@@ -56,7 +53,7 @@ def create(self,
         is_upsert = True  # This class will only support upsert use cases
         items = self._prepare_items_for_upload(items,
                                                max_attachments_per_data_row,
-                                               is_upsert)
+                                               is_upsert=is_upsert)
         json_chunks = self._chunk_down_by_bytes(items, max_chunk_size_bytes)
         with ThreadPoolExecutor(FILE_UPLOAD_THREAD_COUNT) as executor:
             futures = [
@@ -66,14 +63,11 @@ def create(self,
             ]
             return [future.result() for future in as_completed(futures)]
 
-    def create_one(self,
-                   items,
-                   max_attachments_per_data_row=None,
-                   is_upsert=False) -> List[str]:
+    def create_one(self, items, max_attachments_per_data_row=None) -> List[str]:
         items = self._prepare_items_for_upload(items,
                                                max_attachments_per_data_row,
                                                is_upsert)
-        # Prepare and upload the desciptor file
+        # Prepare and upload the descriptor file
         data = json.dumps(items)
         return self.client.upload_data(data,
                                        content_type="application/json",
@@ -84,8 +78,7 @@ def _prepare_items_for_upload(self,
                                   max_attachments_per_data_row=None,
                                   is_upsert=False):
         """
-        This function is shared by `Dataset.create_data_rows`, `Dataset.create_data_rows_sync` and `Dataset.update_data_rows`.
-        It is used to prepare the input file. The user defined input is validated, processed, and json stringified.
+        This function is used to prepare the input file. The user defined input is validated, processed, and json stringified.
         Finally the json data is uploaded to gcs and a uri is returned. This uri can be passed as a parameter to a mutation that uploads data rows
 
         Each element in `items` can be either a `str` or a `dict`. If
@@ -109,9 +102,6 @@ def _prepare_items_for_upload(self,
         >>>     {DataRow.row_data: {"type" : ..., 'version' : ..., 'messages' : [...]}}
         >>>     ])
 
-        For an example showing how to upload tiled data_rows see the following notebook:
-            https://github.com/Labelbox/labelbox-python/blob/ms/develop/model_assisted_labeling/tiled_imagery_mal.ipynb
-
         Args:
             items (iterable of (dict or str)): See above for details.
             max_attachments_per_data_row (Optional[int]): Param used during attachment validation to determine
@@ -305,7 +295,7 @@ def _chunk_down_by_bytes(self, items: List[dict],
                              max_chunk_size: int) -> Generator[str, None, None]:
         """
         Recursively chunks down a list of items into smaller lists until each list is less than or equal to max_chunk_size bytes
-        NOTE: of one data row is large than max_chunk_size, it will be returned as one chunk
+        NOTE: if one data row is larger than max_chunk_size, it will be returned as one chunk
 
         Returns:
             Generator[str, None, None]: A generator that yields a json string
diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py
@@ -2,7 +2,6 @@
 import uuid
 from datetime import datetime
 import json
-import math
 import requests
 import os