2
2
from labelbase import Client as baseClient
3
3
import pandas
4
4
from concurrent .futures import ThreadPoolExecutor , as_completed
5
- from google .api_core import retry
6
5
7
- def create_upload_dict (df :pandas .core .frame .DataFrame , local_files : bool , lb_client :Client , base_client :baseClient , row_data_col :str ,
6
+ def create_upload_dict (df :pandas .core .frame .DataFrame , lb_client :Client , base_client :baseClient , row_data_col :str ,
8
7
global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" , verbose = False ):
9
8
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
10
9
Args:
11
10
df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
12
- local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
13
11
lb_client : Required (labelbox.client.Client) - Labelbox Client object
14
12
base_client : Required (labelbase.client.Client) - Labelbase Client object
15
13
row_data_col : Required (str) - Column containing asset URL or file path
@@ -33,36 +31,23 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_clie
33
31
for index , row in df .iterrows ():
34
32
futures .append (
35
33
exc .submit (
36
- create_data_rows , local_files , lb_client , base_client , row ,
34
+ create_data_rows , lb_client , base_client , row ,
37
35
metadata_name_key_to_schema , metadata_schema_to_name_key ,
38
36
row_data_col , global_key_col , external_id_col , metadata_index , divider
39
37
)
40
38
)
41
39
for f in as_completed (futures ):
42
40
res = f .result ()
43
- print (res )
44
41
global_key_to_upload_dict [str (res ["global_key" ])] = res
45
42
if verbose :
46
43
print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
47
- return global_key_to_upload_dict
48
-
49
- @retry .Retry (predicate = retry .if_exception_type (Exception ), deadline = 120. )
50
- def create_file (lb_client , file_path :str ):
51
- """ Wraps lb_client.upload_file() in retry logic
52
- Args:
53
- lb_client : Required (labelbox.client.Client) - Labelbox Client object
54
- file_path : Required (str) - String corresponding to the row data file path
55
- Returns:
56
- Temporary URL to-be-uploaded to Labelbox
57
- """
58
- return lb_client .upload_file (file_path )
44
+ return global_key_to_upload_dict
59
45
60
- def create_data_rows (local_files : bool , lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
46
+ def create_data_rows (lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
61
47
metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict ,
62
48
row_data_col :str , global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" ):
63
49
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
64
50
Args:
65
- local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
66
51
lb_client : Required (labelbox.client.Client) - Labelbox Client object
67
52
base_client : Required (labelbase.client.Client) - Labelbase Client object
68
53
row_data_col : Required (str) - Column containing asset URL or file path
@@ -75,7 +60,7 @@ def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient,
75
60
Returns:
76
61
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
77
62
"""
78
- row_data = create_file ( str ( row [ row_data_col ])) if local_files else str (row [row_data_col ])
63
+ row_data = str (row [row_data_col ])
79
64
metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
80
65
if metadata_index :
81
66
for metadata_field_name in metadata_index .keys ():
@@ -87,7 +72,7 @@ def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient,
87
72
divider = divider
88
73
)
89
74
if metadata_value :
90
- metadata_fields .append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : value })
75
+ metadata_fields .append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : metadata_value })
91
76
else :
92
77
continue
93
78
return {"row_data" :row_data ,"global_key" :str (row [global_key_col ]),"external_id" :str (row [external_id_col ]),"metadata_fields" :metadata_fields }
0 commit comments