2
2
import os
3
3
from concurrent .futures import ThreadPoolExecutor , as_completed
4
4
5
- from typing import Iterable , List , Union
5
+ from typing import Iterable , List
6
6
7
7
from labelbox .exceptions import InvalidQueryError
8
8
from labelbox .exceptions import InvalidAttributeError
11
11
from labelbox .orm .model import Field
12
12
from labelbox .schema .embedding import EmbeddingVector
13
13
from labelbox .pydantic_compat import BaseModel
14
- from labelbox .schema .internal .datarow_upload_constants import MAX_DATAROW_PER_API_OPERATION
14
+ from labelbox .schema .internal .datarow_upload_constants import (
15
+ MAX_DATAROW_PER_API_OPERATION , FILE_UPLOAD_THREAD_COUNT )
15
16
from labelbox .schema .internal .data_row_upsert_item import DataRowUpsertItem
16
17
17
18
@@ -31,15 +32,15 @@ def create_descriptor_file(client,
31
32
"""
32
33
This function is shared by `Dataset.create_data_rows`, `Dataset.create_data_rows_sync` and `Dataset.update_data_rows`.
33
34
It is used to prepare the input file. The user defined input is validated, processed, and json stringified.
34
- Finally the json data is uploaded to gcs and a uri is returned. This uri can be passed to
35
+ Finally the json data is uploaded to gcs and a uri is returned. This uri can be passed as a parameter to a mutation that uploads data rows
35
36
36
37
Each element in `items` can be either a `str` or a `dict`. If
37
38
it is a `str`, then it is interpreted as a local file path. The file
38
39
is uploaded to Labelbox and a DataRow referencing it is created.
39
40
40
41
If an item is a `dict`, then it could support one of the two following structures
41
42
1. For static imagery, video, and text it should map `DataRow` field names to values.
42
- At the minimum an `item ` passed as a `dict` must contain a `row_data` key and value.
43
+ At the minimum an `items ` passed as a `dict` must contain a `row_data` key and value.
43
44
If the value for row_data is a local file path and the path exists,
44
45
then the local file will be uploaded to labelbox.
45
46
@@ -73,7 +74,7 @@ def create_descriptor_file(client,
73
74
a DataRow.
74
75
ValueError: When the upload parameters are invalid
75
76
"""
76
- file_upload_thread_count = 20
77
+ file_upload_thread_count = FILE_UPLOAD_THREAD_COUNT
77
78
DataRow = Entity .DataRow
78
79
AssetAttachment = Entity .AssetAttachment
79
80
@@ -184,7 +185,7 @@ def validate_keys(item):
184
185
raise InvalidAttributeError (DataRow , invalid_keys )
185
186
return item
186
187
187
- def formatLegacyConversationalData (item ):
188
+ def format_legacy_conversational_data (item ):
188
189
messages = item .pop ("conversationalData" )
189
190
version = item .pop ("version" , 1 )
190
191
type = item .pop ("type" , "application/vnd.labelbox.conversational" )
@@ -215,7 +216,7 @@ def convert_item(data_row_item):
215
216
return item
216
217
217
218
if "conversationalData" in item :
218
- formatLegacyConversationalData (item )
219
+ format_legacy_conversational_data (item )
219
220
220
221
# Convert all payload variations into the same dict format
221
222
item = format_row (item )
@@ -270,9 +271,9 @@ def upload_in_chunks(client, specs: List[DataRowUpsertItem],
270
271
for i in range (0 , len (specs ), upsert_chunk_size )
271
272
]
272
273
273
- def _upload_chunk (_chunk ):
274
+ def _upload_chunk (chunk ):
274
275
return DataRowUploader .create_descriptor_file (client ,
275
- _chunk ,
276
+ chunk ,
276
277
is_upsert = True )
277
278
278
279
with ThreadPoolExecutor (file_upload_thread_count ) as executor :
0 commit comments