Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 3f7b6ed

Browse files
Update connector.py
1 parent 7226875 commit 3f7b6ed

File tree

1 file changed

+71
-41
lines changed

1 file changed

+71
-41
lines changed

labelpandas/connector.py

Lines changed: 71 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,52 @@
1-
def create_data_rows(
2-
local_files,
3-
lb_client,
4-
row,
5-
row_data_col,
6-
global_key_col,
7-
external_id_col,
8-
metadata_index,
9-
metadata_name_key_to_schema,
10-
metadata_schema_to_name_key,
11-
divider):
1+
from labelbox import Client
2+
import pandas
3+
4+
def create_upload_dict(df:pandas.core.frame.DataFrame, local_files:bool, lb_client:Client, row:pandas.core.series.Series,
5+
row_data_col:str, global_key_col=None, external_id_col=None, metadata_index:dict={}, divider:str="///"):
6+
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
7+
Args:
8+
df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
9+
local_files : Required (bool) - If True, will create urls for local files / If False, treats the values in `row_data_col` as urls
10+
lb_client : Required (labelbox.client.Client) - Labelbox Client object
11+
row : Required (pandas.core.series.Series) - Pandas row object
12+
row_data_col : Required (str) - Column name where the data row row data URL is located
13+
global_key_col : Optional (str) - Column name where the data row global key is located - defaults to the row_data_col
14+
external_id_col : Optional (str) - Column name where the data row external ID is located - defaults to the global_key_col
15+
metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type must be one of "enum", "string", "datetime" or "number"
16+
divider : Optional (str) - String delimiter to separate metadata field names from their metadata answer options in your metadata_name_key_to_schema dictionary
17+
Returns:
18+
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
19+
"""
20+
global_key_col = global_key_col if global_key_col else row_data_col
21+
external_id_col = external_id_col if external_id_col else global_key_col
22+
metadata_schema_to_name_key = get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=False)
23+
metadata_name_key_to_schema = get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=True)
24+
global_key_to_upload_dict = {}
25+
futures = []
26+
with ThreadPoolExecutor() as exc:
27+
for index, row in df.iterrows():
28+
futures.append(
29+
exc.submit(
30+
create_data_rows, local_files, lb_client, row, row_data_col,
31+
global_key_col, external_id_col, metadata_index, metadata_name_key_to_schema,
32+
metadata_schema_to_name_key, divider
33+
)
34+
)
35+
for f in as_completed(futures):
36+
res = f.result()
37+
global_key_to_upload_dict[str(res["global_key"])] = res
38+
return global_key_to_upload_dict
39+
40+
def create_data_rows(local_files:bool, lb_client:Client, row:pandas.core.series.Series, row_data_col:str, global_key_col=None, external_id_col=None,
41+
metadata_index:dict={}, metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict, divider:str="///"):
1242
""" Function to-be-multithreaded to create data row dictionaries from a Pandas table
1343
Args:
1444
local_files : Required (bool) - If True, will create urls for local files / If False, treats the values in `row_data_col` as urls
1545
lb_client : Required (labelbox.client.Client) - Labelbox Client object
1646
row : Required (pandas.core.series.Series) - Pandas row object
1747
row_data_col : Required (str) - Column name where the data row row data URL is located
18-
global_key_col : Required (str) - Column name where the data row global key is located - defaults to the row_data column
19-
external_id_col : Required (str) - Column name where the data row external ID is located - defaults to the row_data column
48+
global_key_col : Optional (str) - Column name where the data row global key is located - defaults to the row_data_col
49+
external_id_col : Optional (str) - Column name where the data row external ID is located - defaults to the global_key_col
2050
metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type must be one of "enum", "string", "datetime" or "number"
2151
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
2252
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
@@ -36,33 +66,33 @@ def create_data_rows(
3666
data_row_dict['metadata_fields'].append({"schema_id" : metadata_schema_to_name_key[metadata_field_name], "value" : value})
3767
return data_row_dict
3868

39-
def get_columns_function(table):
40-
return [col for col in table.columns]
41-
42-
def get_unique_values_function(table, column_name:str):
43-
return list(table[column_name].unique())
69+
def get_columns_function(df):
70+
"""Grabs all column names from a Pandas DataFrame
71+
Args:
72+
df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
73+
Returns:
74+
List of strings corresponding to all column names
75+
"""
76+
return [col for col in df.columns]
4477

45-
def add_column_function(table, column_name:str):
46-
table[column_name] = ""
47-
return table
78+
def get_unique_values_function(df, column_name:str):
79+
"""Grabs all unique values from a column in a Pandas DataFrame
80+
Args:
81+
df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
82+
column_name : Required (str) - Column name
83+
Returns:
84+
List of strings corresponding to all unique values in a column
85+
"""
86+
return list(df[column_name].unique())
4887

49-
def create_upload_dict(table, local_files, lb_client, row, row_data_col, global_key_col, external_id_col, metadata_index, divider):
50-
global_key_col = global_key_col if global_key_col else row_data_col
51-
external_id_col = external_id_col if external_id_col else global_key_col
52-
metadata_schema_to_name_key = get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=False)
53-
metadata_name_key_to_schema = get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=True)
54-
global_key_to_upload_dict = {}
55-
futures = []
56-
with ThreadPoolExecutor() as exc:
57-
for index, row in table.iterrows():
58-
futures.append(
59-
exc.submit(
60-
connector.create_data_rows, local_files, self.lb_client, row, row_data_col,
61-
global_key_col, external_id_col, metadata_index, metadata_name_key_to_schema,
62-
metadata_schema_to_name_key, divider
63-
)
64-
)
65-
for f in as_completed(futures):
66-
res = f.result()
67-
global_key_to_upload_dict[str(res["global_key"])] = res
68-
return global_key_to_upload_dict
88+
def add_column_function(df, column_name:str, default_value=""):
89+
""" Adds a column of empty values to an existing Pandas DataFrame
90+
Args:
91+
df : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
92+
column_name : Required (str) - Column name
93+
default_value : Optional - Value to insert into column
94+
Returns:
95+
You Pandas DataFrame with a new column
96+
"""
97+
df[column_name] = default_value
98+
return df

0 commit comments

Comments
 (0)