Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 12fccaa

Browse files
Update data_rows.py
1 parent 7fc027b commit 12fccaa

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

labelpandas/data_rows.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
1-
import pandas as pd
1+
import pandas
22
from labelbox import Client as labelboxClient
33
import labelbase
44
from labelpandas import connector
55
from tqdm import tqdm
66
from concurrent.futures import ThreadPoolExecutor, as_completed
77

8-
def create_data_row_upload_dict(client:labelboxClient, table:dict,
8+
def create_data_row_upload_dict(client:labelboxClient, table: pandas.core.frame.DataFrame,table_dict:dict,
99
row_data_col:str, global_key_col:str, external_id_col:str, dataset_id_col:str,
1010
dataset_id:str, metadata_index:dict, attachment_index:dict,
1111
divider:str, verbose:bool, extra_client:bool=None):
1212
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
1313
Args:
1414
client : Required (labelbox.client.Client) - Labelbox Client object
15-
table : Required (dict) - Pandas DataFrame as dict with df.to_dict("records")
15+
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
16+
table_dict : Required (dict) - Pandas DataFrame as dict with df.to_dict("records")
1617
row_data_col : Required (str) - Column containing asset URL or raw text
1718
global_key_col : Required (str) - Column name containing the data row global key - defaults to row data
1819
external_id_col : Required (str) - Column name containing the data row external ID - defaults to global key
@@ -28,18 +29,18 @@ def create_data_row_upload_dict(client:labelboxClient, table:dict,
2829
- global_key_to_upload_dict - Dictionary where {key=global_key : value=data row dictionary in upload format}
2930
- errors - List of dictionaries containing conversion error information; see connector.create_data_rows() for more information
3031
"""
31-
table_length = len(df_dict)
32+
table_length = connector.get_table_length_function(table=table)
3233
if verbose:
3334
print(f'Creating upload list - {table_length} rows in Pandas DataFrame')
34-
unique_global_key_count = len(list(set([str(row_dict[global_key_col]) for row_dict in df_dict])))
35+
unique_global_key_count = len(connector.get_unique_values_function(table=table, column_name=global_key_col))
3536
if table_length != unique_global_key_count:
3637
print(f"Warning: Your global key column is not unique - upload will resume, only uploading 1 data row per unique global key")
3738
metadata_schema_to_name_key = labelbase.metadata.get_metadata_schema_to_name_key(client=client, lb_mdo=False, divider=divider, invert=False)
3839
metadata_name_key_to_schema = labelbase.metadata.get_metadata_schema_to_name_key(client=client, lb_mdo=False, divider=divider, invert=True)
3940
if dataset_id:
4041
dataset_to_global_key_to_upload_dict = {dataset_id : {}}
4142
else:
42-
dataset_to_global_key_to_upload_dict = {id : {} for id in list(set([str(row_dict[dataset_id_col]))) for row_dict in df_dict])))}
43+
dataset_to_global_key_to_upload_dict = {id : {} for id in connector.get_unique_values_function(table=table, column_name=dataset_id_col)}
4344
with ThreadPoolExecutor(max_workers=8) as exc:
4445
errors = []
4546
futures = []

0 commit comments

Comments
 (0)