2
2
from labelbase import Client as baseClient
3
3
import pandas
4
4
from concurrent .futures import ThreadPoolExecutor , as_completed
5
- import numpy
6
5
7
6
def create_upload_dict (df :pandas .core .frame .DataFrame , local_files :bool , lb_client :Client , base_client :baseClient , row_data_col :str ,
8
7
global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" , verbose = False ):
@@ -33,26 +32,27 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_clie
33
32
for index , row in df .iterrows ():
34
33
futures .append (
35
34
exc .submit (
36
- create_data_rows , local_files , lb_client , row ,
35
+ create_data_rows , local_files , lb_client , base_client , row ,
37
36
metadata_name_key_to_schema , metadata_schema_to_name_key ,
38
37
row_data_col , global_key_col , external_id_col , metadata_index , divider
39
38
)
40
39
)
41
40
for f in as_completed (futures ):
42
41
res = f .result ()
42
+ print (res )
43
43
global_key_to_upload_dict [str (res ["global_key" ])] = res
44
44
if verbose :
45
45
print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
46
46
return global_key_to_upload_dict
47
47
48
- def create_data_rows (local_files :bool , lb_client :Client , row :pandas .core .series .Series ,
48
+ def create_data_rows (local_files :bool , lb_client :Client , base_client : baseClient , row :pandas .core .series .Series ,
49
49
metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict ,
50
50
row_data_col :str , global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" ):
51
51
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
52
52
Args:
53
53
local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
54
54
lb_client : Required (labelbox.client.Client) - Labelbox Client object
55
- row : Required (pandas.core.series.Series ) - Pandas Row object
55
+ base_client : Required (labelbase.client.Client ) - Labelbase Client object
56
56
row_data_col : Required (str) - Column containing asset URL or file path
57
57
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
58
58
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
@@ -64,29 +64,22 @@ def create_data_rows(local_files:bool, lb_client:Client, row:pandas.core.series.
64
64
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
65
65
"""
66
66
row_data = lb_client .upload_file (str (row [row_data_col ])) if local_files else str (row [row_data_col ])
67
- data_row_dict = {
68
- "row_data" : row_data , "global_key" : str (row [global_key_col ]), "external_id" : row [external_id_col ],
69
- "metadata_fields" : [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
70
- }
67
+ metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
71
68
if metadata_index :
72
69
for metadata_field_name in metadata_index .keys ():
73
- row_value = row [metadata_field_name ]
74
- metadata_type = metadata_index [metadata_field_name ]
75
- if row_value :
76
- if str (row_value ) == "nan" :
77
- continue
78
- elif metadata_type == "enum" :
79
- name_key = f"{ metadata_field_name } { divider } { row [metadata_field_name ]} "
80
- value = metadata_name_key_to_schema [name_key ]
81
- elif metadata_type == "number" :
82
- value = int (row_value )
83
- elif metadata_type == "string" :
84
- value = str (row_value )
85
- else : ## Update for datetime later
86
- value = row_value
87
- data_row_dict ['metadata_fields' ].append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : value })
88
- return data_row_dict
89
-
70
+ metadata_value = base_client .process_metadata_value (
71
+ metadata_value = row [metadata_field_name ],
72
+ metadata_type = metadata_index [metadata_field_name ],
73
+ parent_name = metadata_field_name ,
74
+ metadata_name_key_to_schema = metadata_name_key_to_schema ,
75
+ divider = divider
76
+ )
77
+ if metadata_value :
78
+ metadata_fields .append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : value })
79
+ else :
80
+ continue
81
+ return {"row_data" :row_data ,"global_key" :str (row [global_key_col ]),"external_id" :str (row [external_id_col ]),"metadata_fields" :metadata_fields }
82
+
90
83
def get_columns_function (df ):
91
84
"""Grabs all column names from a Pandas DataFrame
92
85
Args:
0 commit comments