Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 7fc027b

Browse files
Create batches.py
1 parent 21cf3ea commit 7fc027b

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

labelpandas/batches.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import pandas as pd
2+
3+
def create_batches_dict(table: pandas.core.frame.DataFrame, table_dict:dict,
4+
global_key_col:str, project_id_col:str,
5+
project_id:str, global_key_to_data_row_id:dict):
6+
""" From a Pandas DataFrame, creates a dictionary where {key=project_id : value=list_of_data_row_ids}
7+
Args:
8+
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
9+
table_dict : Required (dict) - Pandas DataFrame as dict with df.to_dict("records")
10+
global_key_col : Required (str) - Column name containing the data row global key - defaults to row data
11+
project_id_col : Required (str) - Column name containing the project ID to batch a given row to
12+
project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no "project_id" column exists
13+
global_key_to_data_row_id : Required (dict) - Dictionary where {key=global_key : value=data_row_id}
14+
Returns:
15+
Dictionary where {key=project_id : value=list_of_data_row_ids}
16+
"""
17+
if project_id:
18+
project_id_to_batch_dict = {project_id : []}
19+
else:
20+
project_ids = labelpandas.connector.get_unique_values_function(table=table)
21+
project_id_to_batch_dict = {id : [] for id in project_ids}
22+
errors = []
23+
try:
24+
for row in table_dict:
25+
id = project_id if project_id else row[project_id_col]
26+
data_row_id = global_key_to_data_row_id[row[global_key_col]]
27+
project_id_to_batch_dict[id].append(data_row_id)
28+
except Exception as e:
29+
errors = e
30+
return project_id_to_batch_dict, errors

0 commit comments

Comments
 (0)