5
5
from tqdm import tqdm
6
6
from concurrent .futures import ThreadPoolExecutor , as_completed
7
7
8
- def create_data_row_upload_dict (client : labelboxClient , table :dict ,
8
+ def create_data_row_upload_dict (table :dict ,
9
9
row_data_col :str , global_key_col :str , external_id_col :str , dataset_id_col :str ,
10
10
dataset_id :str , metadata_index :dict , attachment_index :dict ,
11
11
divider :str , verbose :bool , extra_client :bool = None ):
12
12
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
13
13
Args:
14
- client : Required (labelbox.client.Client) - Labelbox Client object
15
14
table : Required (dict) - Pandas DataFrame as dict with df.to_dict("records")
16
15
row_data_col : Required (str) - Column containing asset URL or raw text
17
16
global_key_col : Required (str) - Column name containing the data row global key - defaults to row data
@@ -47,7 +46,7 @@ def create_data_row_upload_dict(client:labelboxClient, table:dict,
47
46
print (f'Submitting data rows...' )
48
47
for index , row_dict in tqdm (df_dict ):
49
48
futures .append (exc .submit (
50
- create_data_rows , client , row_dict , metadata_name_key_to_schema , metadata_schema_to_name_key ,
49
+ create_data_rows , row_dict , metadata_name_key_to_schema , metadata_schema_to_name_key ,
51
50
row_data_col , global_key_col , external_id_col , dataset_id_col ,
52
51
dataset_id , metadata_index , attachment_index , divider
53
52
))
@@ -64,7 +63,7 @@ def create_data_row_upload_dict(client:labelboxClient, table:dict,
64
63
else :
65
64
for index , row in table .iterrows ():
66
65
futures .append (exc .submit (
67
- create_data_rows , client , row_dict , metadata_name_key_to_schema , metadata_schema_to_name_key ,
66
+ create_data_rows , row_dict , metadata_name_key_to_schema , metadata_schema_to_name_key ,
68
67
row_data_col , global_key_col , external_id_col , dataset_id_col ,
69
68
dataset_id , metadata_index , attachment_index , divider
70
69
))
@@ -81,14 +80,13 @@ def create_data_row_upload_dict(client:labelboxClient, table:dict,
81
80
print (f'Generated upload list' )
82
81
return global_key_to_upload_dict , errors
83
82
84
- def create_data_rows (client : labelboxClient , row_dict :dict ,
83
+ def create_data_rows (row_dict :dict ,
85
84
metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict ,
86
85
row_data_col :str , global_key_col :str , external_id_col :str , dataset_id_col :str ,
87
86
metadata_index :str , metadata_index :dict , attachment_index :dict ,
88
87
divider :str ):
89
88
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
90
89
Args:
91
- client : Required (labelbox.client.Client) - Labelbox Client object
92
90
row_dict : Required (dict) - Dictionary where {key=column_name : value=row_value}
93
91
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
94
92
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
0 commit comments