1
- from labelbase import Client as baseClient
1
+ from labelbase . metadata import get_metadata_schema_to_name_key , process_metadata_value
2
2
from labelbox import Client
3
3
import pandas
4
4
from concurrent .futures import ThreadPoolExecutor , as_completed
5
5
from tqdm .autonotebook import tqdm
6
6
import math
7
7
8
- def create_upload_dict (table :pandas .core .frame .DataFrame , lb_client :Client , base_client : baseClient , row_data_col :str ,
8
+ def create_upload_dict (table :pandas .core .frame .DataFrame , lb_client :Client , row_data_col :str ,
9
9
global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, local_files :bool = False ,
10
10
divider :str = "///" , verbose = False ):
11
11
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
12
12
Args:
13
13
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
14
14
lb_client : Required (labelbox.client.Client) - Labelbox Client object
15
- base_client : Required (labelbase.client.Client) - Labelbase Client object
16
15
row_data_col : Required (str) - Column containing asset URL or file path
17
16
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
18
17
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
@@ -34,8 +33,8 @@ def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, base
34
33
print (f"Warning: Your global key column is not unique - upload will resume, only uploading 1 data row for duplicate global keys" )
35
34
global_key_col = global_key_col if global_key_col else row_data_col
36
35
external_id_col = external_id_col if external_id_col else global_key_col
37
- metadata_schema_to_name_key = base_client . get_metadata_schema_to_name_key (lb_mdo = False , divider = divider , invert = False )
38
- metadata_name_key_to_schema = base_client . get_metadata_schema_to_name_key (lb_mdo = False , divider = divider , invert = True )
36
+ metadata_schema_to_name_key = get_metadata_schema_to_name_key (client = lb_client , lb_mdo = False , divider = divider , invert = False )
37
+ metadata_name_key_to_schema = get_metadata_schema_to_name_key (client = lb_client , lb_mdo = False , divider = divider , invert = True )
39
38
with ThreadPoolExecutor (max_workers = 8 ) as exc :
40
39
global_key_to_upload_dict = {}
41
40
errors = []
@@ -44,13 +43,13 @@ def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, base
44
43
print (f'Submitting data rows...' )
45
44
for index , row in tqdm (table .iterrows ()):
46
45
futures .append (exc .submit (
47
- create_data_rows , lb_client , base_client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
46
+ create_data_rows , lb_client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
48
47
row_data_col , global_key_col , external_id_col , metadata_index , local_files , divider
49
48
))
50
49
else :
51
50
for index , row in table .iterrows ():
52
51
futures .append (exc .submit (
53
- create_data_rows , lb_client , base_client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
52
+ create_data_rows , lb_client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
54
53
row_data_col , global_key_col , external_id_col , metadata_index , local_files , divider
55
54
))
56
55
if verbose :
@@ -72,13 +71,12 @@ def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, base
72
71
print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
73
72
return global_key_to_upload_dict , errors
74
73
75
- def create_data_rows (lb_client :Client , base_client : baseClient , row :pandas .core .series .Series ,
74
+ def create_data_rows (lb_client :Client , row :pandas .core .series .Series ,
76
75
metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict , row_data_col :str ,
77
76
global_key_col :str , external_id_col :str , metadata_index :dict , local_files :bool , divider :str ):
78
77
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
79
78
Args:
80
79
lb_client : Required (labelbox.client.Client) - Labelbox Client object
81
- base_client : Required (labelbase.client.Client) - Labelbase Client object
82
80
row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
83
81
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
84
82
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
@@ -104,8 +102,8 @@ def create_data_rows(lb_client:Client, base_client:baseClient, row:pandas.core.s
104
102
metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
105
103
if metadata_index :
106
104
for metadata_field_name in metadata_index .keys ():
107
- input_metadata = base_client . process_metadata_value (
108
- metadata_value = row [metadata_field_name ], metadata_type = metadata_index [metadata_field_name ],
105
+ input_metadata = process_metadata_value (
106
+ client = lb_client , metadata_value = row [metadata_field_name ], metadata_type = metadata_index [metadata_field_name ],
109
107
parent_name = metadata_field_name , metadata_name_key_to_schema = metadata_name_key_to_schema , divider = divider
110
108
)
111
109
if input_metadata :
0 commit comments