Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit fc6a1a3

Browse files
Update connector.py
1 parent cbeac3e commit fc6a1a3

File tree

1 file changed

+54
-13
lines changed

1 file changed

+54
-13
lines changed

labelpandas/connector.py

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from labelbase.metadata import get_metadata_schema_to_name_key, process_metadata_value
2+
from labelbase.ontology import get_ontology_schema_to_name_path
23
from labelbox import labelboxClient
34
import pandas
45
from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -16,21 +17,61 @@ def create_batches(table=pandas.core.frame.DataFrame, global_key_col:str, projec
1617
Dictionary where {key=project_id : value=list_of_data_row_ids}
1718
"""
1819
project_id_to_batch_dict = {}
20+
errors = []
1921
if not project_id_col:
20-
raise ValueError(f"No project_id_col provided - please provide a column indicating what project to batch data rows to")
21-
column_names = get_columns_function(table)
22-
if project_id_col not in column_names:
23-
raise ValueError(f"Provided value for project_id_col `{project_id_col}` not in provided table column names")
24-
for index, row in table.iterrows():
25-
project_id = row[project_id_col]
26-
data_row_id = global_key_to_data_row_id[row[global_key_col]]
27-
if project_id not in project_id_to_batch_dict.keys():
28-
project_id_to_batch_dict[project_id] = []
29-
project_id_to_batch_dict[project_id].append(data_row_id)
30-
return project_id_to_batch_dict
22+
errors = f"No project_id_col provided - please provide a column indicating what project to batch data rows to"
23+
else:
24+
try:
25+
column_names = get_columns_function(table)
26+
if project_id_col not in column_names:
27+
raise ValueError(f"Provided value for project_id_col `{project_id_col}` not in provided table column names")
28+
for index, row in table.iterrows():
29+
project_id = row[project_id_col]
30+
data_row_id = global_key_to_data_row_id[row[global_key_col]]
31+
if project_id not in project_id_to_batch_dict.keys():
32+
project_id_to_batch_dict[project_id] = []
33+
project_id_to_batch_dict[project_id].append(data_row_id)
34+
except Exception as e:
35+
errors = e
36+
return project_id_to_batch_dict, errors
3137

32-
def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, ):
33-
return global_key_to_upload_dict, errors
38+
def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, row_data_col:str, global_key_col:str,
39+
project_id_col:str, annotation_index:dict, divider:str="///", verbose:bool=False):
40+
if not annotation_index:
41+
project_id_to_upload_dict = {}
42+
errors = f"No annotation index provided - no annotations uploaded"
43+
else:
44+
try:
45+
project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function(table, project_id_col)}
46+
for project_id in project_id_to_upload_dict:
47+
project_id_to_upload_dict[project_id] = []
48+
project_id_to_ontology_index[project_id] = get_ontology_schema_to_name_path(
49+
ontology=client.get_project(project_id).ontology(), divider=divider, invert=True
50+
)
51+
if verbose:
52+
for index, row in tqdm(table.iterrows()):
53+
for column_name in annotation_index.keys():
54+
ndjsons = create_ndjsons(
55+
annotation_values=row[column_name],
56+
annotation_type=annotation_index[column_name],
57+
ontology_index=project_id_to_ontology_index[row[project_id_col]],
58+
divide=divider
59+
)
60+
for ndjson in ndjsons:
61+
project_id_to_upload_dict[row[project_id_col]].append(ndjson)
62+
for index, row in table.iterrows():
63+
for column_name in annotation_index.keys():
64+
ndjsons = create_ndjsons(
65+
annotation_values=row[column_name],
66+
annotation_type=annotation_index[column_name],
67+
ontology_index=project_id_to_ontology_index[row[project_id_col]],
68+
divide=divider
69+
)
70+
for ndjson in ndjsons:
71+
project_id_to_upload_dict[row[project_id_col]].append(ndjson)
72+
except Exception as e:
73+
errors = e
74+
return project_id_to_upload_dict, errors
3475

3576
def create_data_row_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, row_data_col:str,
3677
global_key_col:str="", external_id_col:str="", metadata_index:dict={}, attachment_index:dict=attachment_index

0 commit comments

Comments
 (0)