Skip to content

Commit 2aa342b

Browse files
author
Val Brodsky
committed
PR feedback
1 parent 69efe35 commit 2aa342b

File tree

4 files changed

+20
-35
lines changed

4 files changed

+20
-35
lines changed

libs/labelbox/src/labelbox/schema/dataset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from labelbox.schema.user import User
3434
from labelbox.schema.iam_integration import IAMIntegration
3535
from labelbox.schema.internal.data_row_upsert_item import (DataRowUpsertItem)
36-
from labelbox.schema.internal.data_row_uploader import DataRowUploader
36+
import labelbox.schema.internal.data_row_uploader as data_row_uploader
3737
from labelbox.schema.internal.descriptor_file_creator import DescriptorFileCreator
3838
from labelbox.schema.internal.datarow_upload_constants import (
3939
FILE_UPLOAD_THREAD_COUNT, UPSERT_CHUNK_SIZE_BYTES)
@@ -262,7 +262,7 @@ def create_data_rows(self,
262262
Use this instead of `Dataset.create_data_rows_sync` uploads for batches that contain more than 1000 data rows.
263263
264264
Args:
265-
items (iterable of (dict or str)): See the docstring for `DataRowUploader.create_descriptor_file` for more information
265+
items (iterable of (dict or str))
266266
267267
Returns:
268268
Task representing the data import on the server side. The Task
@@ -617,7 +617,7 @@ def _exec_upsert_data_rows(
617617
file_upload_thread_count: int = FILE_UPLOAD_THREAD_COUNT
618618
) -> "DataUpsertTask":
619619

620-
manifest = DataRowUploader.upload_in_chunks(
620+
manifest = data_row_uploader.upload_in_chunks(
621621
client=self.client,
622622
specs=specs,
623623
file_upload_thread_count=file_upload_thread_count,

libs/labelbox/src/labelbox/schema/internal/data_row_uploader.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,18 @@ class UploadManifest(pydantic_compat.BaseModel):
1313
chunk_uris: List[str]
1414

1515

16-
class DataRowUploader:
16+
def upload_in_chunks(client, specs: List[DataRowUpsertItem],
17+
file_upload_thread_count: int,
18+
max_chunk_size_bytes: int) -> UploadManifest:
19+
empty_specs = list(filter(lambda spec: spec.is_empty(), specs))
1720

18-
@staticmethod
19-
def upload_in_chunks(client, specs: List[DataRowUpsertItem],
20-
file_upload_thread_count: int,
21-
max_chunk_size_bytes: int) -> UploadManifest:
22-
empty_specs = list(filter(lambda spec: spec.is_empty(), specs))
21+
if empty_specs:
22+
ids = list(map(lambda spec: spec.id.get("value"), empty_specs))
23+
raise ValueError(f"The following items have an empty payload: {ids}")
2324

24-
if empty_specs:
25-
ids = list(map(lambda spec: spec.id.get("value"), empty_specs))
26-
raise ValueError(
27-
f"The following items have an empty payload: {ids}")
25+
chunk_uris = DescriptorFileCreator(client).create(
26+
specs, max_chunk_size_bytes=max_chunk_size_bytes)
2827

29-
chunk_uris = DescriptorFileCreator(client).create(
30-
specs, max_chunk_size_bytes=max_chunk_size_bytes)
31-
32-
return UploadManifest(source="SDK",
33-
item_count=len(specs),
34-
chunk_uris=chunk_uris)
28+
return UploadManifest(source="SDK",
29+
item_count=len(specs),
30+
chunk_uris=chunk_uris)

libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,7 @@ class DescriptorFileCreator:
2828
2929
Args:
3030
client (Client): The client object
31-
is_upsert (bool): Whether the upload is an upsert. This is a legacy parameter and should always be True because this class will only support upsert
3231
max_chunk_size_bytes (int): The maximum size of the file in bytes
33-
34-
TODO: Remove is_upsert parameter
3532
"""
3633

3734
def __init__(self, client: "Client"):
@@ -56,7 +53,7 @@ def create(self,
5653
is_upsert = True # This class will only support upsert use cases
5754
items = self._prepare_items_for_upload(items,
5855
max_attachments_per_data_row,
59-
is_upsert)
56+
is_upsert=is_upsert)
6057
json_chunks = self._chunk_down_by_bytes(items, max_chunk_size_bytes)
6158
with ThreadPoolExecutor(FILE_UPLOAD_THREAD_COUNT) as executor:
6259
futures = [
@@ -66,14 +63,11 @@ def create(self,
6663
]
6764
return [future.result() for future in as_completed(futures)]
6865

69-
def create_one(self,
70-
items,
71-
max_attachments_per_data_row=None,
72-
is_upsert=False) -> List[str]:
66+
def create_one(self, items, max_attachments_per_data_row=None) -> List[str]:
7367
items = self._prepare_items_for_upload(items,
7468
max_attachments_per_data_row,
7569
is_upsert)
76-
# Prepare and upload the desciptor file
70+
# Prepare and upload the descriptor file
7771
data = json.dumps(items)
7872
return self.client.upload_data(data,
7973
content_type="application/json",
@@ -84,8 +78,7 @@ def _prepare_items_for_upload(self,
8478
max_attachments_per_data_row=None,
8579
is_upsert=False):
8680
"""
87-
This function is shared by `Dataset.create_data_rows`, `Dataset.create_data_rows_sync` and `Dataset.update_data_rows`.
88-
It is used to prepare the input file. The user defined input is validated, processed, and json stringified.
81+
This function is used to prepare the input file. The user defined input is validated, processed, and json stringified.
8982
Finally the json data is uploaded to gcs and a uri is returned. This uri can be passed as a parameter to a mutation that uploads data rows
9083
9184
Each element in `items` can be either a `str` or a `dict`. If
@@ -109,9 +102,6 @@ def _prepare_items_for_upload(self,
109102
>>> {DataRow.row_data: {"type" : ..., 'version' : ..., 'messages' : [...]}}
110103
>>> ])
111104
112-
For an example showing how to upload tiled data_rows see the following notebook:
113-
https://github.com/Labelbox/labelbox-python/blob/ms/develop/model_assisted_labeling/tiled_imagery_mal.ipynb
114-
115105
Args:
116106
items (iterable of (dict or str)): See above for details.
117107
max_attachments_per_data_row (Optional[int]): Param used during attachment validation to determine
@@ -305,7 +295,7 @@ def _chunk_down_by_bytes(self, items: List[dict],
305295
max_chunk_size: int) -> Generator[str, None, None]:
306296
"""
307297
Recursively chunks down a list of items into smaller lists until each list is less than or equal to max_chunk_size bytes
308-
NOTE: of one data row is large than max_chunk_size, it will be returned as one chunk
298+
NOTE: if one data row is larger than max_chunk_size, it will be returned as one chunk
309299
310300
Returns:
311301
Generator[str, None, None]: A generator that yields a json string

libs/labelbox/tests/integration/test_data_rows.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import uuid
33
from datetime import datetime
44
import json
5-
import math
65
import requests
76
import os
87

0 commit comments

Comments
 (0)