Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 5605e94

Browse files
Update connector.py
1 parent e34ad18 commit 5605e94

File tree

1 file changed

+6
-21
lines changed

1 file changed

+6
-21
lines changed

labelpandas/connector.py

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@
22
from labelbase import Client as baseClient
33
import pandas
44
from concurrent.futures import ThreadPoolExecutor, as_completed
5-
from google.api_core import retry
65

7-
def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_client:Client, base_client:baseClient, row_data_col:str,
6+
def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_client:baseClient, row_data_col:str,
87
global_key_col:str="", external_id_col:str="", metadata_index:dict={}, divider:str="///", verbose=False):
98
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
109
Args:
1110
df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
12-
local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
1311
lb_client : Required (labelbox.client.Client) - Labelbox Client object
1412
base_client : Required (labelbase.client.Client) - Labelbase Client object
1513
row_data_col : Required (str) - Column containing asset URL or file path
@@ -33,36 +31,23 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_clie
3331
for index, row in df.iterrows():
3432
futures.append(
3533
exc.submit(
36-
create_data_rows, local_files, lb_client, base_client, row,
34+
create_data_rows, lb_client, base_client, row,
3735
metadata_name_key_to_schema, metadata_schema_to_name_key,
3836
row_data_col, global_key_col, external_id_col, metadata_index, divider
3937
)
4038
)
4139
for f in as_completed(futures):
4240
res = f.result()
43-
print(res)
4441
global_key_to_upload_dict[str(res["global_key"])] = res
4542
if verbose:
4643
print(f'Generated upload list - {len(global_key_to_upload_dict)} data rows to upload')
47-
return global_key_to_upload_dict
48-
49-
@retry.Retry(predicate=retry.if_exception_type(Exception), deadline=120.)
50-
def create_file(lb_client, file_path:str):
51-
""" Wraps lb_client.upload_file() in retry logic
52-
Args:
53-
lb_client : Required (labelbox.client.Client) - Labelbox Client object
54-
file_path : Required (str) - String corresponding to the row data file path
55-
Returns:
56-
Temporary URL to-be-uploaded to Labelbox
57-
"""
58-
return lb_client.upload_file(file_path)
44+
return global_key_to_upload_dict
5945

60-
def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient, row:pandas.core.series.Series,
46+
def create_data_rows(lb_client:Client, base_client:baseClient, row:pandas.core.series.Series,
6147
metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict,
6248
row_data_col:str, global_key_col:str="", external_id_col:str="", metadata_index:dict={}, divider:str="///"):
6349
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
6450
Args:
65-
local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
6651
lb_client : Required (labelbox.client.Client) - Labelbox Client object
6752
base_client : Required (labelbase.client.Client) - Labelbase Client object
6853
row_data_col : Required (str) - Column containing asset URL or file path
@@ -75,7 +60,7 @@ def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient,
7560
Returns:
7661
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
7762
"""
78-
row_data = create_file(str(row[row_data_col])) if local_files else str(row[row_data_col])
63+
row_data = str(row[row_data_col])
7964
metadata_fields = [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "Pandas"}]
8065
if metadata_index:
8166
for metadata_field_name in metadata_index.keys():
@@ -87,7 +72,7 @@ def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient,
8772
divider=divider
8873
)
8974
if metadata_value:
90-
metadata_fields.append({"schema_id" : metadata_name_key_to_schema[metadata_field_name], "value" : value})
75+
metadata_fields.append({"schema_id" : metadata_name_key_to_schema[metadata_field_name], "value" : metadata_value})
9176
else:
9277
continue
9378
return {"row_data":row_data,"global_key":str(row[global_key_col]),"external_id":str(row[external_id_col]),"metadata_fields":metadata_fields}

0 commit comments

Comments
 (0)