Update labels.py

raphaeljafriLB · web-flow · commit 518f40754e2b · 2023-02-04T11:37:51.000-05:00
diff --git a/labelpandas/labels.py b/labelpandas/labels.py
@@ -5,54 +5,70 @@
         [
             annotation_ndjson,
             annotation_ndjson,
-            annotation_ndjson,            
+            annotation_ndjson     
         ],
     project_id : 
         [
             annotation_ndjson,
             annotation_ndjson,
-            annotation_ndjson,            
-        ],              
+            annotation_ndjson
+        ]      
 }
 This is the format that labelbase.uploader.batch_upload_annotations() expects
 """
 import pandas
+import labelbase
 from labelbox import Client as labelboxClient
+from tqdm.autonotebook import tqdm
+from concurrent.futures import ThreadPoolExecutor, as_completed
 
-def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, row_data_col:str, global_key_col:str,
-                                  project_id_col:str, annotation_index:dict, divider:str="///", verbose:bool=False):
-    if not annotation_index:
-        project_id_to_upload_dict = {}        
-        errors = f"No annotation index provided - no annotations uploaded"
-    else:
-        try:
-            project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function(table, project_id_col)}
-            for project_id in project_id_to_upload_dict:
-                project_id_to_upload_dict[project_id] = []
-                project_id_to_ontology_index[project_id] = get_ontology_schema_to_name_path(
-                    ontology=client.get_project(project_id).ontology(), divider=divider, invert=True
+def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, table_dict:dict,
+                                  row_data_col:str, global_key_col:str, project_id_col:str, 
+                                  project_id:str, annotation_index:dict, global_key_to_data_row_id:dict,
+                                  divider:str="///", verbose:bool=False):
+    """
+    Args:
+        client                      :   Required (labelbox.client.Client) - Labelbox Client object        
+        table                       :   Required (pandas.core.frame.DataFrame) - Pandas DataFrame                
+        table_dict                  :   Required (dict) - Pandas DataFrame as dict with df.to_dict("records")    
+        row_data_col                :   Required (str) - Column containing asset URL or raw text
+        global_key_col              :   Required (str) - Column name containing the data row global key - defaults to row data
+        project_id_col              :   Required (str) - Column name containing the project ID to batch a given row to
+        project_id                  :   Required (str) - Labelbox project ID to add data rows to - only necessary if no "project_id" column exists
+        annotation_index            :   Required (dict) - Dictonary where {key=column_name : value=annotation_type}
+        global_key_to_data_row_id   :   Required (dict) - Dictionary where {key=global_key : value=data_row_id}
+    Returns:
+        
+    """
+    project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function(table, project_id_col)}
+    project_id_to_ontology = {}
+    for project_id in project_id_to_upload_dict:
+        ontology = client.get_project(project_id).ontology()
+        project_id_to_ontology[project_id] = {
+            "ontology_index" : labelbase.ontology.get_ontology_schema_to_name_path(ontology, divider=divider, invert=True, detailed=True),
+            "schema_to_name_path" : labelbase.ontology.get_ontology_schema_to_name_path(ontology, divider=divider, invert=False, detailed=False)
+        )
+    if verbose:
+        for row_dict in tqdm(table_dict):
+            for column_name in annotation_index.keys():
+                ndjsons = create_ndjsons(
+                    data_row_id = global_key_to_data_row_id[global_key_col],
+                    top_level_name=annotation_index[column_name],
+                    annotation_values=row_dict[column_name],
+                    ontology_index=project_id_to_ontology_index[row[project_id_col]],
+                    divider=divider
                 )
-            if verbose:
-                for index, row in tqdm(table.iterrows()):
-                    for column_name in annotation_index.keys():
-                        ndjsons = create_ndjsons(
-                            annotation_values=row[column_name], 
-                            annotation_type=annotation_index[column_name],
-                            ontology_index=project_id_to_ontology_index[row[project_id_col]],
-                            divide=divider
-                        )
-                        for ndjson in ndjsons:
-                            project_id_to_upload_dict[row[project_id_col]].append(ndjson)    
-                for index, row in table.iterrows():
-                    for column_name in annotation_index.keys():
-                        ndjsons = create_ndjsons(
-                            annotation_values=row[column_name], 
-                            annotation_type=annotation_index[column_name],
-                            ontology_index=project_id_to_ontology_index[row[project_id_col]],
-                            divide=divider
-                        )
-                        for ndjson in ndjsons:
-                            project_id_to_upload_dict[row[project_id_col]].append(ndjson)                              
-        except Exception as e:
-            errors = e
-    return project_id_to_upload_dict, errors
+                for ndjson in ndjsons:
+                    project_id_to_upload_dict[row[project_id_col]].append(ndjson)    
+        for row_dict in table_dict:
+            for column_name in annotation_index.keys():
+                ndjsons = create_ndjsons(
+                    data_row_id = global_key_to_data_row_id[global_key_col],
+                    top_level_name=annotation_index[column_name],
+                    annotation_values=row_dict[column_name],
+                    ontology_index=project_id_to_ontology_index[row[project_id_col]],
+                    divider=divider
+                )
+                for ndjson in ndjsons:
+                    project_id_to_upload_dict[row[project_id_col]].append(ndjson)                              
+    return project_id_to_upload_dict