@@ -28,26 +28,22 @@ class DescriptorFileCreator:
28
28
29
29
Args:
30
30
client (Client): The client object
31
- is_upsert (bool): Whether the upload is an upsert. This is a legacy parameter and should always be True because this class will only support upsert
32
31
max_chunk_size_bytes (int): The maximum size of the file in bytes
33
-
34
- TODO: Remove is_upsert parameter
35
32
"""
36
33
37
34
def __init__ (self , client : "Client" ):
38
35
self .client = client
36
+ """"
37
+ This method is used to convert a list to json and upload it in a file to gcs.
38
+ It will create multiple files if the size of upload is greater than max_chunk_size_bytes in bytes,
39
+ It uploads the files to gcs in parallel, and return a list of urls
39
40
40
- """"
41
- This method is used to convert a list to json and upload it in a file to gcs.
42
- It will create multiple files if the size of upload is greater than max_chunk_size_bytes in bytes,
43
- It uploads the files to gcs in parallel, and return a list of urls
44
-
45
- Args:
46
- items: The list to upload
47
- is_upsert (bool): Whether the upload is an upsert
48
- max_attachments_per_data_row (int): The maximum number of attachments per data row
49
- max_chunk_size_bytes (int): The maximum size of the file in bytes
50
- """
41
+ Args:
42
+ items: The list to upload
43
+ is_upsert (bool): Whether the upload is an upsert
44
+ max_attachments_per_data_row (int): The maximum number of attachments per data row
45
+ max_chunk_size_bytes (int): The maximum size of the file in bytes
46
+ """
51
47
52
48
def create (self ,
53
49
items ,
@@ -56,7 +52,7 @@ def create(self,
56
52
is_upsert = True # This class will only support upsert use cases
57
53
items = self ._prepare_items_for_upload (items ,
58
54
max_attachments_per_data_row ,
59
- is_upsert )
55
+ is_upsert = is_upsert )
60
56
json_chunks = self ._chunk_down_by_bytes (items , max_chunk_size_bytes )
61
57
with ThreadPoolExecutor (FILE_UPLOAD_THREAD_COUNT ) as executor :
62
58
futures = [
@@ -66,14 +62,10 @@ def create(self,
66
62
]
67
63
return [future .result () for future in as_completed (futures )]
68
64
69
- def create_one (self ,
70
- items ,
71
- max_attachments_per_data_row = None ,
72
- is_upsert = False ) -> List [str ]:
65
+ def create_one (self , items , max_attachments_per_data_row = None ) -> List [str ]:
73
66
items = self ._prepare_items_for_upload (items ,
74
- max_attachments_per_data_row ,
75
- is_upsert )
76
- # Prepare and upload the desciptor file
67
+ max_attachments_per_data_row )
68
+ # Prepare and upload the descriptor file
77
69
data = json .dumps (items )
78
70
return self .client .upload_data (data ,
79
71
content_type = "application/json" ,
@@ -84,8 +76,7 @@ def _prepare_items_for_upload(self,
84
76
max_attachments_per_data_row = None ,
85
77
is_upsert = False ):
86
78
"""
87
- This function is shared by `Dataset.create_data_rows`, `Dataset.create_data_rows_sync` and `Dataset.update_data_rows`.
88
- It is used to prepare the input file. The user defined input is validated, processed, and json stringified.
79
+ This function is used to prepare the input file. The user defined input is validated, processed, and json stringified.
89
80
Finally the json data is uploaded to gcs and a uri is returned. This uri can be passed as a parameter to a mutation that uploads data rows
90
81
91
82
Each element in `items` can be either a `str` or a `dict`. If
@@ -109,9 +100,6 @@ def _prepare_items_for_upload(self,
109
100
>>> {DataRow.row_data: {"type" : ..., 'version' : ..., 'messages' : [...]}}
110
101
>>> ])
111
102
112
- For an example showing how to upload tiled data_rows see the following notebook:
113
- https://github.com/Labelbox/labelbox-python/blob/ms/develop/model_assisted_labeling/tiled_imagery_mal.ipynb
114
-
115
103
Args:
116
104
items (iterable of (dict or str)): See above for details.
117
105
max_attachments_per_data_row (Optional[int]): Param used during attachment validation to determine
@@ -305,7 +293,7 @@ def _chunk_down_by_bytes(self, items: List[dict],
305
293
max_chunk_size : int ) -> Generator [str , None , None ]:
306
294
"""
307
295
Recursively chunks down a list of items into smaller lists until each list is less than or equal to max_chunk_size bytes
308
- NOTE: of one data row is large than max_chunk_size, it will be returned as one chunk
296
+ NOTE: if one data row is larger than max_chunk_size, it will be returned as one chunk
309
297
310
298
Returns:
311
299
Generator[str, None, None]: A generator that yields a json string
0 commit comments