15
15
from io import StringIO
16
16
import requests
17
17
18
- from labelbox .exceptions import InvalidQueryError , LabelboxError , ResourceNotFoundError , InvalidAttributeError
18
+ from labelbox .exceptions import InvalidQueryError , LabelboxError , ResourceNotFoundError , ResourceCreationError
19
19
from labelbox .orm .comparison import Comparison
20
20
from labelbox .orm .db_object import DbObject , Updateable , Deletable , experimental
21
21
from labelbox .orm .model import Entity , Field , Relationship
@@ -214,7 +214,10 @@ def convert_field_keys(items):
214
214
res = self .client .execute (query_str , {** args , 'dataset' : self .uid })
215
215
return DataRow (self .client , res ['createDataRow' ])
216
216
217
- def create_data_rows_sync (self , items ) -> None :
217
+ def create_data_rows_sync (
218
+ self ,
219
+ items ,
220
+ file_upload_thread_count = FILE_UPLOAD_THREAD_COUNT ) -> None :
218
221
""" Synchronously bulk upload data rows.
219
222
220
223
Use this instead of `Dataset.create_data_rows` for smaller batches of data rows that need to be uploaded quickly.
@@ -228,6 +231,7 @@ def create_data_rows_sync(self, items) -> None:
228
231
None. If the function doesn't raise an exception then the import was successful.
229
232
230
233
Raises:
234
+ ResourceCreationError: Errors in data row upload
231
235
InvalidQueryError: If the `items` parameter does not conform to
232
236
the specification in Dataset._create_descriptor_file or if the server did not accept the
233
237
DataRow creation request (unknown reason).
@@ -242,18 +246,25 @@ def create_data_rows_sync(self, items) -> None:
242
246
f"Dataset.create_data_rows_sync() supports a max of { max_data_rows_supported } data rows."
243
247
" For larger imports use the async function Dataset.create_data_rows()"
244
248
)
245
- descriptor_url = DescriptorFileCreator (self .client ).create_one (
246
- items , max_attachments_per_data_row = max_attachments_per_data_row )
247
- dataset_param = "datasetId"
248
- url_param = "jsonUrl"
249
- query_str = """mutation AppendRowsToDatasetSyncPyApi($%s: ID!, $%s: String!){
250
- appendRowsToDatasetSync(data:{datasetId: $%s, jsonFileUrl: $%s}
251
- ){dataset{id}}} """ % (dataset_param , url_param , dataset_param ,
252
- url_param )
253
- self .client .execute (query_str , {
254
- dataset_param : self .uid ,
255
- url_param : descriptor_url
256
- })
249
+ if file_upload_thread_count < 1 :
250
+ raise ValueError (
251
+ "file_upload_thread_count must be a positive integer" )
252
+
253
+ upload_items = self ._separate_and_process_items (items )
254
+ specs = DataRowCreateItem .build (self .uid , upload_items )
255
+ task : DataUpsertTask = self ._exec_upsert_data_rows (
256
+ specs , file_upload_thread_count )
257
+ task .wait_till_done ()
258
+
259
+ if task .has_errors ():
260
+ raise ResourceCreationError (
261
+ f"Data row upload errors: { task .errors } " , cause = task .uid )
262
+ if task .status != "COMPLETE" :
263
+ raise ResourceCreationError (
264
+ f"Data row upload did not complete, task status { task .status } task id { task .uid } "
265
+ )
266
+
267
+ return None
257
268
258
269
def create_data_rows (self ,
259
270
items ,
@@ -287,14 +298,18 @@ def create_data_rows(self,
287
298
raise ValueError (
288
299
"file_upload_thread_count must be a positive integer" )
289
300
301
+ # Usage example
302
+ upload_items = self ._separate_and_process_items (items )
303
+ specs = DataRowCreateItem .build (self .uid , upload_items )
304
+ return self ._exec_upsert_data_rows (specs , file_upload_thread_count )
305
+
306
+ def _separate_and_process_items (self , items ):
290
307
string_items = [item for item in items if isinstance (item , str )]
291
308
dict_items = [item for item in items if isinstance (item , dict )]
292
309
dict_string_items = []
293
310
if len (string_items ) > 0 :
294
311
dict_string_items = self ._build_from_local_paths (string_items )
295
- specs = DataRowCreateItem .build (self .uid ,
296
- dict_items + dict_string_items )
297
- return self ._exec_upsert_data_rows (specs , file_upload_thread_count )
312
+ return dict_items + dict_string_items
298
313
299
314
def _build_from_local_paths (
300
315
self ,
0 commit comments