1
1
from labelbase .metadata import get_metadata_schema_to_name_key , process_metadata_value
2
+ from labelbase .ontology import get_ontology_schema_to_name_path
2
3
from labelbox import labelboxClient
3
4
import pandas
4
5
from concurrent .futures import ThreadPoolExecutor , as_completed
@@ -16,21 +17,61 @@ def create_batches(table=pandas.core.frame.DataFrame, global_key_col:str, projec
16
17
Dictionary where {key=project_id : value=list_of_data_row_ids}
17
18
"""
18
19
project_id_to_batch_dict = {}
20
+ errors = []
19
21
if not project_id_col :
20
- raise ValueError (f"No project_id_col provided - please provide a column indicating what project to batch data rows to" )
21
- column_names = get_columns_function (table )
22
- if project_id_col not in column_names :
23
- raise ValueError (f"Provided value for project_id_col `{ project_id_col } ` not in provided table column names" )
24
- for index , row in table .iterrows ():
25
- project_id = row [project_id_col ]
26
- data_row_id = global_key_to_data_row_id [row [global_key_col ]]
27
- if project_id not in project_id_to_batch_dict .keys ():
28
- project_id_to_batch_dict [project_id ] = []
29
- project_id_to_batch_dict [project_id ].append (data_row_id )
30
- return project_id_to_batch_dict
22
+ errors = f"No project_id_col provided - please provide a column indicating what project to batch data rows to"
23
+ else :
24
+ try :
25
+ column_names = get_columns_function (table )
26
+ if project_id_col not in column_names :
27
+ raise ValueError (f"Provided value for project_id_col `{ project_id_col } ` not in provided table column names" )
28
+ for index , row in table .iterrows ():
29
+ project_id = row [project_id_col ]
30
+ data_row_id = global_key_to_data_row_id [row [global_key_col ]]
31
+ if project_id not in project_id_to_batch_dict .keys ():
32
+ project_id_to_batch_dict [project_id ] = []
33
+ project_id_to_batch_dict [project_id ].append (data_row_id )
34
+ except Exception as e :
35
+ errors = e
36
+ return project_id_to_batch_dict , errors
31
37
32
- def create_annotation_upload_dict (client :labelboxClient , table :pandas .core .frame .DataFrame , ):
33
- return global_key_to_upload_dict , errors
38
+ def create_annotation_upload_dict (client :labelboxClient , table :pandas .core .frame .DataFrame , row_data_col :str , global_key_col :str ,
39
+ project_id_col :str , annotation_index :dict , divider :str = "///" , verbose :bool = False ):
40
+ if not annotation_index :
41
+ project_id_to_upload_dict = {}
42
+ errors = f"No annotation index provided - no annotations uploaded"
43
+ else :
44
+ try :
45
+ project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function (table , project_id_col )}
46
+ for project_id in project_id_to_upload_dict :
47
+ project_id_to_upload_dict [project_id ] = []
48
+ project_id_to_ontology_index [project_id ] = get_ontology_schema_to_name_path (
49
+ ontology = client .get_project (project_id ).ontology (), divider = divider , invert = True
50
+ )
51
+ if verbose :
52
+ for index , row in tqdm (table .iterrows ()):
53
+ for column_name in annotation_index .keys ():
54
+ ndjsons = create_ndjsons (
55
+ annotation_values = row [column_name ],
56
+ annotation_type = annotation_index [column_name ],
57
+ ontology_index = project_id_to_ontology_index [row [project_id_col ]],
58
+ divide = divider
59
+ )
60
+ for ndjson in ndjsons :
61
+ project_id_to_upload_dict [row [project_id_col ]].append (ndjson )
62
+ for index , row in table .iterrows ():
63
+ for column_name in annotation_index .keys ():
64
+ ndjsons = create_ndjsons (
65
+ annotation_values = row [column_name ],
66
+ annotation_type = annotation_index [column_name ],
67
+ ontology_index = project_id_to_ontology_index [row [project_id_col ]],
68
+ divide = divider
69
+ )
70
+ for ndjson in ndjsons :
71
+ project_id_to_upload_dict [row [project_id_col ]].append (ndjson )
72
+ except Exception as e :
73
+ errors = e
74
+ return project_id_to_upload_dict , errors
34
75
35
76
def create_data_row_upload_dict (client :labelboxClient , table :pandas .core .frame .DataFrame , row_data_col :str ,
36
77
global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, attachment_index :dict = attachment_index
0 commit comments