Update client.py

raphaeljafriLB · web-flow · commit a8e5da9734f9 · 2023-02-04T11:19:56.000-05:00
diff --git a/labelpandas/client.py b/labelpandas/client.py
@@ -1,6 +1,6 @@
 from labelbox import Client as labelboxClient
 from labelbox.schema.dataset import Dataset as labelboxDataset
-from labelpandas import connector
+import labelpandas
 import labelbase
 import pandas as pd
 
@@ -27,15 +27,15 @@ def __init__(
     #     return table 
 
     def create_data_rows_from_table(
-        self, table:pd.core.frame.DataFrame, lb_dataset:labelboxDataset, project_id:str="", priority=5, 
+        self, table:pd.core.frame.DataFrame, dataset_id:str"", project_id:str="", priority:int=5, 
         upload_method:str="", skip_duplicates:bool=False, verbose:bool=False, divider="///"):
         """ Creates Labelbox data rows given a Pandas table and a Labelbox Dataset
         Args:
             table               :   Required (pandas.core.frame.DataFrame) - Pandas DataFrame    
-            lb_dataset          :   Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to 
-            project_id          :   Required (str) - Labelbox project ID to add data rows to - only necessary if no project_id column exists
+            dataset_id          :   Required (str) - Labelbox dataset ID to add data rows to - only necessary if no "dataset_id" column exists            
+            project_id          :   Required (str) - Labelbox project ID to add data rows to - only necessary if no "project_id" column exists
             priority            :   Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects                             
-            upload_method       :   Optional (str) - Either "mal" or "import" - required if an annotation_index is provided                                               
+            upload_method       :   Optional (str) - Either "mal" or "import" - required to upload annotations (otherwise leave as "")
             skip_duplicates     :   Optional (bool) - Determines how to handle if a global key to-be-uploaded is already in use
                                         If True, will skip duplicate global_keys and not upload them
                                         If False, will generate a unique global_key with a suffix {divider} + "1", "2" and so on
@@ -46,10 +46,13 @@ def create_data_rows_from_table(
         # row_data_col : column with name "row_data"
         # global_key_col : column with name "global_key" - defaults to row_data_col
         # external_id_col : column with name "external_id" - defaults to global_key_col
+        # project_id_col : column with name "project_id" - defaults to "" (requires project_id input argument if no "project_id" column exists)
+        # dataset_id_col : column with name "dataset_id" - defaults to "" (requires project_id input argument if no "dataset_id" column exists)        
+        # external_id_col : column with name "external_id" - defaults to global_key_col        
         # metadata_index : Dictonary where {key=column_name : value=metadata_type}
         # attachment_index : Dictonary where {key=column_name : value=attachment_type}
         # annotation_index : Dictonary where {key=column_name : value=annotation_type}
-        row_data_col, global_key_col, external_id_col, metadata_index, attachment_index, annotation_index = labelbase.connector.validate_columns(
+        row_data_col, global_key_col, external_id_col, project_id_col, dataset_id_col, metadata_index, attachment_index, annotation_index = labelbase.connector.validate_columns(
             table=table,
             get_columns_function=connector.get_columns_function,
             get_unique_values_function=connector.get_unique_values_function,
@@ -58,69 +61,75 @@ def create_data_rows_from_table(
             extra_client=None
         )
         
-        # Create a dictionary where {key=global_key : value=data_row_upload_dict} - this is unique to Pandas
-        global_key_to_upload_dict, data_row_conversion_errors = connector.create_data_row_upload_dict(
-            client=self.lb_client, table=table, 
-            row_data_col=row_data_col, global_key_col=global_key_col, external_id_col=external_id_col, 
-            metadata_index=metadata_index, attachment_index=attachment_index, 
-            divider=divider, verbose=verbose
-        )
+        # Iterating over your pandas DataFrame is faster once converted to a list of dictionaries where {key=column_name : value=row_value}
+        table_dict = table.to_dict('records')
         
-        # If there are conversion errors, let the user know; if there are no successful conversions, terminate the upload
-        if data_row_conversion_errors:
-            print(f'There were {len(data_row_conversion_errors)} errors in creating your upload list - see result["data_row_conversion_errors"] for more information')
-            if global_key_to_upload_dict:
-                print(f'Data row upload will continue')
-            else:
-                print(f'Data row upload will not continue')  
-                    return {
-                        "data_row_upload_results" : [], "data_row_conversion_errors" : data_row_conversion_errors, 
-                        "batch_to_project_errors" : [], "annotation_upload_results" : [], "annotation_conversion_errors" : []
-                    }
+        if (dataset_id_col=="") and (dataset_id==""):
+            raise ValueError(f"To create data rows, please provide either a `dataset_id` column or a Labelbox dataset id to argument `dataset_id`")
+        
+        if (upload_method!="") and (project_id_col=="") and (project_id=="") and (annotation_index!={}):
+            raise ValueError(f"To upload annotations, please provide either a `project_id` column or a Lablebox project id to argument `project_id`")
+        
+        # Create a dictionary where {key=dataset_id : value={key=global_key : value=data_row_upload_dict}} - this is unique to Pandas
+        dataset_to_global_key_to_upload_dict = labelpandas.data_rows.create_data_row_upload_dict(
+            client=self.lb_client, table=table, table_dict=table_dict, 
+            row_data_col=row_data_col, global_key_col=global_key_col, external_id_col=external_id_col, dataset_id_col=dataset_id_col,
+            dataset_id=dataset_id, metadata_index=metadata_index, attachment_index=attachment_index, 
+            divider=divider, verbose=verbose, extra_client=None
+        )
                 
         # Upload your data rows to Labelbox
         data_row_upload_results = labelbase.uploader.batch_create_data_rows(
-            client=self.lb_client, dataset=lb_dataset, global_key_to_upload_dict=global_key_to_upload_dict, 
+            client=self.lb_client, dataset_to_global_key_to_upload_dict=dataset_to_global_key_to_upload_dict, 
             skip_duplicates=skip_duplicates, divider=divider, verbose=verbose
         )
         
-        # Default global_key_col if row_data_col not provided
-        global_key_col = global_key_col if global_key_col else row_data_col
-        
-        # Create a dictionary where {key=global_key : value=data_row_id}
-        global_key_to_data_row_id = uploader.create_global_key_to_data_row_dict(
-            client=self.lb_client, global_keys=connector.get_unique_values_function(table, global_key_col)
-        )
-        
-        # Create a dictionary where {key=project_id : value=list_of_data_row_ids}, if applicable
-        project_id_to_batch_dict, batch_to_project_errors = connector.create_batches(
-            client=self.lb_client, table=table, global_key_col=global_key_col, 
-            project_id_col=project_id_col, global_key_to_data_row_id=global_key_to_data_row_id
-        )
+        # If project ids are provided, we can batch data rows to projects
+        if project_id or project_id_col:
+            
+            # Create a dictionary where {key=global_key : value=data_row_id}
+            global_key_to_data_row_id = labelbase.uploader.create_global_key_to_data_row_dict(
+                client=self.lb_client, global_keys=connector.get_unique_values_function(table, global_key_col)
+            )            
+            
+            # Create a dictionary where {key=project_id : value=list_of_data_row_ids}, if applicable
+            project_id_to_batch_dict = labelpandas.batches.create_batches_dict(
+                client=self.lb_client, table=table, table_dict=table_dict,
+                global_key_col=global_key_col, project_id_col=project_id_col, 
+                global_key_to_data_row_id=global_key_to_data_row_id
+            )
         
-        # Batch data rows to projects, if applicable
-        if not batch_to_project_errors:
-            batch_to_project_errors = uploader.batch_rows_to_project(
+            # Batch data rows to projects, if applicable
+            batch_to_project_results = labelbase.uploader.batch_rows_to_project(
                 client=self.lb_client, project_id_to_batch_dict, priority=priority
             )
             
-        # Create a dictionary where {key=project_id : value=annotation_upload_list}, if applicable
-        project_id_to_upload_dict, annotation_conversion_errors = connector.create_annotation_upload_dict(
-            client=self.lb_client, table=table, row_data_col=row_data_col, global_key_col=global_key_col, 
-            project_id_col=project_id_col, annotation_index=annotation_index, divider=divider, verbose=verbose
-        )
+            if (upload_method in ["mal", "import"]) and (annotation_index!={}):
+            
+                # Create a dictionary where {key=project_id : value=annotation_upload_list}, if applicable
+                project_id_to_upload_dict = connector.create_annotation_upload_dict(
+                    client=self.lb_client, table=table, table_dict=table_dict,
+                    row_data_col=row_data_col, global_key_col=global_key_col, project_id_col=project_id_col, 
+                    annotation_index=annotation_index, divider=divider, verbose=verbose
+                )
+
+                # Upload your annotations to Labelbox, if applicable
+                annotation_upload_results = uploader.batch_upload_annotations(
+                    client=self.lb_client, project_id_to_upload_dict=project_id_to_upload_dict, how=upload_method, verbose=verbose
+                )
+                
+            else:
+                annotation_upload_results = []
+                                 
         
-        # Upload your annotations to Labelbox, if applicable
-        annotation_upload_results = uploader.batch_upload_annotations(
-            client=self.lb_client, project_id_to_upload_dict=project_id_to_upload_dict, how=upload_method, verbose=verbose
-        )
+        else:
+            batch_to_project_results = []
+            annotation_upload_results = []
         
         return {
             "data_row_upload_results" : data_row_upload_results, 
-            "data_row_conversion_errors" : data_row_conversion_errors,
-            "batch_to_project_errors" : batch_to_project_errors,
-            "annotation_upload_results" : annotation_upload_results,
-            "annotation_conversion_errors" : annotation_conversion_errors
+            "batch_to_project_results" : batch_to_project_results,
+            "annotation_upload_results" : annotation_upload_results
         }
     
     # def upsert_table_metadata():