5
5
from tqdm import tqdm
6
6
from concurrent .futures import ThreadPoolExecutor , as_completed
7
7
8
- def create_data_row_upload_dict (client :labelboxClient , table :pd . core . frame . DataFrame ,
8
+ def create_data_row_upload_dict (client :labelboxClient , table :dict ,
9
9
row_data_col :str , global_key_col :str , external_id_col :str , dataset_id_col :str ,
10
10
dataset_id :str , metadata_index :dict , attachment_index :dict ,
11
11
divider :str , verbose :bool , extra_client :bool = None ):
12
12
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
13
13
Args:
14
14
client : Required (labelbox.client.Client) - Labelbox Client object
15
- table : Required (pandas.core.frame.DataFrame ) - Pandas DataFrame
15
+ table : Required (dict ) - Pandas DataFrame as dict with df.to_dict("records")
16
16
row_data_col : Required (str) - Column containing asset URL or raw text
17
17
global_key_col : Required (str) - Column name containing the data row global key - defaults to row data
18
18
external_id_col : Required (str) - Column name containing the data row external ID - defaults to global key
@@ -28,17 +28,18 @@ def create_data_row_upload_dict(client:labelboxClient, table:pd.core.frame.DataF
28
28
- global_key_to_upload_dict - Dictionary where {key=global_key : value=data row dictionary in upload format}
29
29
- errors - List of dictionaries containing conversion error information; see connector.create_data_rows() for more information
30
30
"""
31
- table_length = connector . get_table_length_function ( table = table , extra_client = extra_client )
31
+ table_length = len ( df_dict )
32
32
if verbose :
33
33
print (f'Creating upload list - { table_length } rows in Pandas DataFrame' )
34
- if table_length != connector .get_unique_values_function (table = table , column_name = global_key_col , extra_client = extra_client ):
34
+ unique_global_key_count = len (list (set ([str (row_dict [global_key_col ]) for row_dict in df_dict ])))
35
+ if table_length != unique_global_key_count :
35
36
print (f"Warning: Your global key column is not unique - upload will resume, only uploading 1 data row per unique global key" )
36
37
metadata_schema_to_name_key = labelbase .metadata .get_metadata_schema_to_name_key (client = lb_client , lb_mdo = False , divider = divider , invert = False )
37
38
metadata_name_key_to_schema = labelbase .metadata .get_metadata_schema_to_name_key (client = lb_client , lb_mdo = False , divider = divider , invert = True )
38
39
if dataset_id :
39
40
dataset_to_global_key_to_upload_dict = {dataset_id : {}}
40
41
else :
41
- dataset_to_global_key_to_upload_dict = {id : {} for id in connector . get_unique_values_function ( table = table )}
42
+ dataset_to_global_key_to_upload_dict = {id : {} for id in len ( list ( set ([ str ( row_dict [ dataset_id_col ]) for row_dict in df_dict ])) )}
42
43
df_dict = df .to_dict ('records' )
43
44
with ThreadPoolExecutor (max_workers = 8 ) as exc :
44
45
errors = []
0 commit comments