Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit d1f1d30

Browse files
Update connector.py
1 parent 0ed9e96 commit d1f1d30

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

labelpandas/connector.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
1-
from labelbase import Client as baseClient
1+
from labelbase.metadata import get_metadata_schema_to_name_key, process_metadata_value
22
from labelbox import Client
33
import pandas
44
from concurrent.futures import ThreadPoolExecutor, as_completed
55
from tqdm.autonotebook import tqdm
66
import math
77

8-
def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, base_client:baseClient, row_data_col:str,
8+
def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, row_data_col:str,
99
global_key_col:str="", external_id_col:str="", metadata_index:dict={}, local_files:bool=False,
1010
divider:str="///", verbose=False):
1111
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
1212
Args:
1313
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
1414
lb_client : Required (labelbox.client.Client) - Labelbox Client object
15-
base_client : Required (labelbase.client.Client) - Labelbase Client object
1615
row_data_col : Required (str) - Column containing asset URL or file path
1716
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
1817
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
@@ -34,8 +33,8 @@ def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, base
3433
print(f"Warning: Your global key column is not unique - upload will resume, only uploading 1 data row for duplicate global keys")
3534
global_key_col = global_key_col if global_key_col else row_data_col
3635
external_id_col = external_id_col if external_id_col else global_key_col
37-
metadata_schema_to_name_key = base_client.get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=False)
38-
metadata_name_key_to_schema = base_client.get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=True)
36+
metadata_schema_to_name_key = get_metadata_schema_to_name_key(client=lb_client, lb_mdo=False, divider=divider, invert=False)
37+
metadata_name_key_to_schema = get_metadata_schema_to_name_key(client=lb_client, lb_mdo=False, divider=divider, invert=True)
3938
with ThreadPoolExecutor(max_workers=8) as exc:
4039
global_key_to_upload_dict = {}
4140
errors = []
@@ -44,13 +43,13 @@ def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, base
4443
print(f'Submitting data rows...')
4544
for index, row in tqdm(table.iterrows()):
4645
futures.append(exc.submit(
47-
create_data_rows, lb_client, base_client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
46+
create_data_rows, lb_client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
4847
row_data_col, global_key_col, external_id_col, metadata_index, local_files, divider
4948
))
5049
else:
5150
for index, row in table.iterrows():
5251
futures.append(exc.submit(
53-
create_data_rows, lb_client, base_client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
52+
create_data_rows, lb_client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
5453
row_data_col, global_key_col, external_id_col, metadata_index, local_files, divider
5554
))
5655
if verbose:
@@ -72,13 +71,12 @@ def create_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, base
7271
print(f'Generated upload list - {len(global_key_to_upload_dict)} data rows to upload')
7372
return global_key_to_upload_dict, errors
7473

75-
def create_data_rows(lb_client:Client, base_client:baseClient, row:pandas.core.series.Series,
74+
def create_data_rows(lb_client:Client, row:pandas.core.series.Series,
7675
metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict, row_data_col:str,
7776
global_key_col:str, external_id_col:str, metadata_index:dict, local_files:bool, divider:str):
7877
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
7978
Args:
8079
lb_client : Required (labelbox.client.Client) - Labelbox Client object
81-
base_client : Required (labelbase.client.Client) - Labelbase Client object
8280
row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
8381
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
8482
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
@@ -104,8 +102,8 @@ def create_data_rows(lb_client:Client, base_client:baseClient, row:pandas.core.s
104102
metadata_fields = [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "Pandas"}]
105103
if metadata_index:
106104
for metadata_field_name in metadata_index.keys():
107-
input_metadata = base_client.process_metadata_value(
108-
metadata_value=row[metadata_field_name], metadata_type=metadata_index[metadata_field_name],
105+
input_metadata = process_metadata_value(
106+
client=lb_client, metadata_value=row[metadata_field_name], metadata_type=metadata_index[metadata_field_name],
109107
parent_name=metadata_field_name, metadata_name_key_to_schema=metadata_name_key_to_schema, divider=divider
110108
)
111109
if input_metadata:

0 commit comments

Comments
 (0)