Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 518f407

Browse files
Update labels.py
1 parent 57d9c1b commit 518f407

File tree

1 file changed

+55
-39
lines changed

1 file changed

+55
-39
lines changed

labelpandas/labels.py

Lines changed: 55 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,54 +5,70 @@
55
[
66
annotation_ndjson,
77
annotation_ndjson,
8-
annotation_ndjson,
8+
annotation_ndjson
99
],
1010
project_id :
1111
[
1212
annotation_ndjson,
1313
annotation_ndjson,
14-
annotation_ndjson,
15-
],
14+
annotation_ndjson
15+
]
1616
}
1717
This is the format that labelbase.uploader.batch_upload_annotations() expects
1818
"""
1919
import pandas
20+
import labelbase
2021
from labelbox import Client as labelboxClient
22+
from tqdm.autonotebook import tqdm
23+
from concurrent.futures import ThreadPoolExecutor, as_completed
2124

22-
def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, row_data_col:str, global_key_col:str,
23-
project_id_col:str, annotation_index:dict, divider:str="///", verbose:bool=False):
24-
if not annotation_index:
25-
project_id_to_upload_dict = {}
26-
errors = f"No annotation index provided - no annotations uploaded"
27-
else:
28-
try:
29-
project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function(table, project_id_col)}
30-
for project_id in project_id_to_upload_dict:
31-
project_id_to_upload_dict[project_id] = []
32-
project_id_to_ontology_index[project_id] = get_ontology_schema_to_name_path(
33-
ontology=client.get_project(project_id).ontology(), divider=divider, invert=True
25+
def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, table_dict:dict,
26+
row_data_col:str, global_key_col:str, project_id_col:str,
27+
project_id:str, annotation_index:dict, global_key_to_data_row_id:dict,
28+
divider:str="///", verbose:bool=False):
29+
"""
30+
Args:
31+
client : Required (labelbox.client.Client) - Labelbox Client object
32+
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
33+
table_dict : Required (dict) - Pandas DataFrame as dict with df.to_dict("records")
34+
row_data_col : Required (str) - Column containing asset URL or raw text
35+
global_key_col : Required (str) - Column name containing the data row global key - defaults to row data
36+
project_id_col : Required (str) - Column name containing the project ID to batch a given row to
37+
project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no "project_id" column exists
38+
annotation_index : Required (dict) - Dictonary where {key=column_name : value=annotation_type}
39+
global_key_to_data_row_id : Required (dict) - Dictionary where {key=global_key : value=data_row_id}
40+
Returns:
41+
42+
"""
43+
project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function(table, project_id_col)}
44+
project_id_to_ontology = {}
45+
for project_id in project_id_to_upload_dict:
46+
ontology = client.get_project(project_id).ontology()
47+
project_id_to_ontology[project_id] = {
48+
"ontology_index" : labelbase.ontology.get_ontology_schema_to_name_path(ontology, divider=divider, invert=True, detailed=True),
49+
"schema_to_name_path" : labelbase.ontology.get_ontology_schema_to_name_path(ontology, divider=divider, invert=False, detailed=False)
50+
)
51+
if verbose:
52+
for row_dict in tqdm(table_dict):
53+
for column_name in annotation_index.keys():
54+
ndjsons = create_ndjsons(
55+
data_row_id = global_key_to_data_row_id[global_key_col],
56+
top_level_name=annotation_index[column_name],
57+
annotation_values=row_dict[column_name],
58+
ontology_index=project_id_to_ontology_index[row[project_id_col]],
59+
divider=divider
3460
)
35-
if verbose:
36-
for index, row in tqdm(table.iterrows()):
37-
for column_name in annotation_index.keys():
38-
ndjsons = create_ndjsons(
39-
annotation_values=row[column_name],
40-
annotation_type=annotation_index[column_name],
41-
ontology_index=project_id_to_ontology_index[row[project_id_col]],
42-
divide=divider
43-
)
44-
for ndjson in ndjsons:
45-
project_id_to_upload_dict[row[project_id_col]].append(ndjson)
46-
for index, row in table.iterrows():
47-
for column_name in annotation_index.keys():
48-
ndjsons = create_ndjsons(
49-
annotation_values=row[column_name],
50-
annotation_type=annotation_index[column_name],
51-
ontology_index=project_id_to_ontology_index[row[project_id_col]],
52-
divide=divider
53-
)
54-
for ndjson in ndjsons:
55-
project_id_to_upload_dict[row[project_id_col]].append(ndjson)
56-
except Exception as e:
57-
errors = e
58-
return project_id_to_upload_dict, errors
61+
for ndjson in ndjsons:
62+
project_id_to_upload_dict[row[project_id_col]].append(ndjson)
63+
for row_dict in table_dict:
64+
for column_name in annotation_index.keys():
65+
ndjsons = create_ndjsons(
66+
data_row_id = global_key_to_data_row_id[global_key_col],
67+
top_level_name=annotation_index[column_name],
68+
annotation_values=row_dict[column_name],
69+
ontology_index=project_id_to_ontology_index[row[project_id_col]],
70+
divider=divider
71+
)
72+
for ndjson in ndjsons:
73+
project_id_to_upload_dict[row[project_id_col]].append(ndjson)
74+
return project_id_to_upload_dict

0 commit comments

Comments
 (0)