4
4
from concurrent .futures import ThreadPoolExecutor , as_completed
5
5
6
6
def create_upload_dict (df :pandas .core .frame .DataFrame , local_files :bool , lb_client :Client , base_client :baseClient , row_data_col :str ,
7
- global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" ):
7
+ global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" , verbose = False ):
8
8
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
9
9
Args:
10
10
df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
@@ -16,9 +16,12 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_clie
16
16
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
17
17
metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
18
18
divider : Optional (str) - String delimiter for all name keys generated
19
+ verbose : Required (bool) - If True, prints information about code execution
19
20
Returns:
20
21
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
21
22
"""
23
+ if verbose :
24
+ print (f'Creating upload list - { len (df )} rows in Pandas DataFrame' )
22
25
global_key_col = global_key_col if global_key_col else row_data_col
23
26
external_id_col = external_id_col if external_id_col else global_key_col
24
27
metadata_schema_to_name_key = base_client .get_metadata_schema_to_name_key (lb_mdo = False , divider = divider , invert = False )
@@ -36,7 +39,9 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_clie
36
39
)
37
40
for f in as_completed (futures ):
38
41
res = f .result ()
39
- global_key_to_upload_dict [str (res ["global_key" ])] = res
42
+ global_key_to_upload_dict [str (res ["global_key" ])] = res
43
+ if verbose :
44
+ print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
40
45
return global_key_to_upload_dict
41
46
42
47
def create_data_rows (local_files :bool , lb_client :Client , row :pandas .core .series .Series ,
@@ -66,7 +71,7 @@ def create_data_rows(local_files:bool, lb_client:Client, row:pandas.core.series.
66
71
for metadata_field_name in metadata_index .keys ():
67
72
name_key = f"{ metadata_field_name } { divider } { row [metadata_field_name ]} "
68
73
value = row [metadata_field_name ] if name_key not in metadata_name_key_to_schema .keys () else metadata_name_key_to_schema [name_key ]
69
- data_row_dict ['metadata_fields' ].append ({"schema_id" : metadata_schema_to_name_key [metadata_field_name ], "value" : value })
74
+ data_row_dict ['metadata_fields' ].append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : value })
70
75
return data_row_dict
71
76
72
77
def get_columns_function (df ):
0 commit comments