Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 5f8679f

Browse files
Update connector.py
1 parent dcdf205 commit 5f8679f

File tree

1 file changed

+18
-25
lines changed

1 file changed

+18
-25
lines changed

labelpandas/connector.py

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from labelbase import Client as baseClient
33
import pandas
44
from concurrent.futures import ThreadPoolExecutor, as_completed
5-
import numpy
65

76
def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_client:Client, base_client:baseClient, row_data_col:str,
87
global_key_col:str="", external_id_col:str="", metadata_index:dict={}, divider:str="///", verbose=False):
@@ -33,26 +32,27 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_clie
3332
for index, row in df.iterrows():
3433
futures.append(
3534
exc.submit(
36-
create_data_rows, local_files, lb_client, row,
35+
create_data_rows, local_files, lb_client, base_client, row,
3736
metadata_name_key_to_schema, metadata_schema_to_name_key,
3837
row_data_col, global_key_col, external_id_col, metadata_index, divider
3938
)
4039
)
4140
for f in as_completed(futures):
4241
res = f.result()
42+
print(res)
4343
global_key_to_upload_dict[str(res["global_key"])] = res
4444
if verbose:
4545
print(f'Generated upload list - {len(global_key_to_upload_dict)} data rows to upload')
4646
return global_key_to_upload_dict
4747

48-
def create_data_rows(local_files:bool, lb_client:Client, row:pandas.core.series.Series,
48+
def create_data_rows(local_files:bool, lb_client:Client, base_client:baseClient, row:pandas.core.series.Series,
4949
metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict,
5050
row_data_col:str, global_key_col:str="", external_id_col:str="", metadata_index:dict={}, divider:str="///"):
5151
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
5252
Args:
5353
local_files : Required (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
5454
lb_client : Required (labelbox.client.Client) - Labelbox Client object
55-
row : Required (pandas.core.series.Series) - Pandas Row object
55+
base_client : Required (labelbase.client.Client) - Labelbase Client object
5656
row_data_col : Required (str) - Column containing asset URL or file path
5757
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
5858
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
@@ -64,29 +64,22 @@ def create_data_rows(local_files:bool, lb_client:Client, row:pandas.core.series.
6464
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
6565
"""
6666
row_data = lb_client.upload_file(str(row[row_data_col])) if local_files else str(row[row_data_col])
67-
data_row_dict = {
68-
"row_data" : row_data, "global_key" : str(row[global_key_col]), "external_id" : row[external_id_col],
69-
"metadata_fields" : [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "Pandas"}]
70-
}
67+
metadata_fields = [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "Pandas"}]
7168
if metadata_index:
7269
for metadata_field_name in metadata_index.keys():
73-
row_value = row[metadata_field_name]
74-
metadata_type = metadata_index[metadata_field_name]
75-
if row_value:
76-
if str(row_value) == "nan":
77-
continue
78-
elif metadata_type == "enum":
79-
name_key = f"{metadata_field_name}{divider}{row[metadata_field_name]}"
80-
value = metadata_name_key_to_schema[name_key]
81-
elif metadata_type == "number":
82-
value = int(row_value)
83-
elif metadata_type == "string":
84-
value = str(row_value)
85-
else: ## Update for datetime later
86-
value = row_value
87-
data_row_dict['metadata_fields'].append({"schema_id" : metadata_name_key_to_schema[metadata_field_name], "value" : value})
88-
return data_row_dict
89-
70+
metadata_value = base_client.process_metadata_value(
71+
metadata_value=row[metadata_field_name],
72+
metadata_type=metadata_index[metadata_field_name],
73+
parent_name=metadata_field_name,
74+
metadata_name_key_to_schema=metadata_name_key_to_schema,
75+
divider=divider
76+
)
77+
if metadata_value:
78+
metadata_fields.append({"schema_id" : metadata_name_key_to_schema[metadata_field_name], "value" : value})
79+
else:
80+
continue
81+
return {"row_data":row_data,"global_key":str(row[global_key_col]),"external_id":str(row[external_id_col]),"metadata_fields":metadata_fields}
82+
9083
def get_columns_function(df):
9184
"""Grabs all column names from a Pandas DataFrame
9285
Args:

0 commit comments

Comments
 (0)