@@ -38,26 +38,18 @@ def create_data_row_upload_dict(client:labelboxClient, table:pd.core.frame.DataF
38
38
dataset_to_global_key_to_upload_dict = {dataset_id : {}}
39
39
else :
40
40
dataset_to_global_key_to_upload_dict = {id : {} for id in connector .get_unique_values_function (table = table )}
41
+ df_dict = df .to_dict ('records' )
41
42
with ThreadPoolExecutor (max_workers = 8 ) as exc :
42
- global_key_to_upload_dict = {}
43
43
errors = []
44
44
futures = []
45
45
if verbose :
46
46
print (f'Submitting data rows...' )
47
- for index , row in tqdm (table . iterrows () ):
47
+ for index , row_dict in tqdm (df_dict ):
48
48
futures .append (exc .submit (
49
- create_data_rows , client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
49
+ create_data_rows , client , row_dict , metadata_name_key_to_schema , metadata_schema_to_name_key ,
50
50
row_data_col , global_key_col , external_id_col , dataset_id_col ,
51
51
dataset_id , metadata_index , attachment_index , divider
52
52
))
53
- else :
54
- for index , row in table .iterrows ():
55
- futures .append (exc .submit (
56
- create_data_rows , client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
57
- row_data_col , global_key_col , external_id_col , dataset_id_col ,
58
- dataset_id , metadata_index , attachment_index , divider
59
- ))
60
- if verbose :
61
53
print (f'Processing data rows...' )
62
54
for f in tqdm (as_completed (futures )):
63
55
res = f .result ()
@@ -67,8 +59,14 @@ def create_data_row_upload_dict(client:labelboxClient, table:pd.core.frame.DataF
67
59
id = str (list (res .keys ()))[0 ]
68
60
data_row_dict = res ["res" ][id ]
69
61
global_key = str (data_row_dict ["global_key" ])
70
- dataset_to_global_key_to_upload_dict [id ].update ({global_key :data_row_dict })
62
+ dataset_to_global_key_to_upload_dict [id ].update ({global_key :data_row_dict })
71
63
else :
64
+ for index , row in table .iterrows ():
65
+ futures .append (exc .submit (
66
+ create_data_rows , client , row_dict , metadata_name_key_to_schema , metadata_schema_to_name_key ,
67
+ row_data_col , global_key_col , external_id_col , dataset_id_col ,
68
+ dataset_id , metadata_index , attachment_index , divider
69
+ ))
72
70
for f in as_completed (futures ):
73
71
res = f .result ()
74
72
if res ['error' ]:
@@ -82,15 +80,15 @@ def create_data_row_upload_dict(client:labelboxClient, table:pd.core.frame.DataF
82
80
print (f'Generated upload list' )
83
81
return global_key_to_upload_dict , errors
84
82
85
- def create_data_rows (client :labelboxClient , row : pandas . core . series . Series ,
83
+ def create_data_rows (client :labelboxClient , row_dict : dict ,
86
84
metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict ,
87
85
row_data_col :str , global_key_col :str , external_id_col :str , dataset_id_col :str ,
88
86
metadata_index :str , metadata_index :dict , attachment_index :dict ,
89
87
divider :str ):
90
88
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
91
89
Args:
92
90
client : Required (labelbox.client.Client) - Labelbox Client object
93
- row : Required (pandas.core.series.Series ) - Pandas Series object, corresponds to one row in a df.iterrow()
91
+ row_dict : Required (dict ) - Dictionary where {key=column_name : value=row_value}
94
92
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
95
93
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
96
94
row_data_col : Required (str) - Column containing asset URL or raw text
@@ -108,16 +106,16 @@ def create_data_rows(client:labelboxClient, row:pandas.core.series.Series,
108
106
"""
109
107
return_value = {"error" : None , "res" : {}}
110
108
try :
111
- id = dataset_id if dataset_id else row ["dataset_id_col" ]
109
+ id = dataset_id if dataset_id else row_dict ["dataset_id_col" ]
112
110
return_value ["res" ] = {id : {}}
113
- return_value ["res" ][id ]["row_data" ] = str (row [row_data_col ])
114
- return_value ["res" ][id ]["global_key" ] = str (row [global_key_col ])
115
- return_value ["res" ][id ]["external_id" ] = str (row [external_id_col ])
111
+ return_value ["res" ][id ]["row_data" ] = str (row_dict [row_data_col ])
112
+ return_value ["res" ][id ]["global_key" ] = str (row_dict [global_key_col ])
113
+ return_value ["res" ][id ]["external_id" ] = str (row_dict [external_id_col ])
116
114
metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
117
115
if metadata_index :
118
116
for metadata_field_name in metadata_index .keys ():
119
117
input_metadata = labelbase .metadata .process_metadata_value (
120
- client = client , metadata_value = row [metadata_field_name ], metadata_type = metadata_index [metadata_field_name ],
118
+ client = client , metadata_value = row_dict [metadata_field_name ], metadata_type = metadata_index [metadata_field_name ],
121
119
parent_name = metadata_field_name , metadata_name_key_to_schema = metadata_name_key_to_schema , divider = divider
122
120
)
123
121
if input_metadata :
@@ -128,7 +126,7 @@ def create_data_rows(client:labelboxClient, row:pandas.core.series.Series,
128
126
if attachment_index :
129
127
return_value ["res" ][id ]["attachments" ] = []
130
128
for column_name in attachment_index :
131
- return_value ["res" ][id ]['attachments' ].append ({"type" : attachment_index [column_name ], "value" : row [column_name ]})
129
+ return_value ["res" ][id ]['attachments' ].append ({"type" : attachment_index [column_name ], "value" : row_dict [column_name ]})
132
130
except Exception as e :
133
131
return_value ["error" ] = e
134
132
return return_value
0 commit comments