1
1
from labelbox import Client as labelboxClient
2
2
from labelbox .schema .dataset import Dataset as labelboxDataset
3
- from labelpandas import connector
3
+ import labelpandas
4
4
import labelbase
5
5
import pandas as pd
6
6
@@ -27,15 +27,15 @@ def __init__(
27
27
# return table
28
28
29
29
def create_data_rows_from_table (
30
- self , table :pd .core .frame .DataFrame , lb_dataset : labelboxDataset , project_id :str = "" , priority = 5 ,
30
+ self , table :pd .core .frame .DataFrame , dataset_id : str "" , project_id :str = "" , priority : int = 5 ,
31
31
upload_method :str = "" , skip_duplicates :bool = False , verbose :bool = False , divider = "///" ):
32
32
""" Creates Labelbox data rows given a Pandas table and a Labelbox Dataset
33
33
Args:
34
34
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
35
- lb_dataset : Required (labelbox.schema.dataset.Dataset ) - Labelbox dataset to add data rows to
36
- project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no project_id column exists
35
+ dataset_id : Required (str ) - Labelbox dataset ID to add data rows to - only necessary if no "dataset_id" column exists
36
+ project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no " project_id" column exists
37
37
priority : Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects
38
- upload_method : Optional (str) - Either "mal" or "import" - required if an annotation_index is provided
38
+ upload_method : Optional (str) - Either "mal" or "import" - required to upload annotations (otherwise leave as "")
39
39
skip_duplicates : Optional (bool) - Determines how to handle if a global key to-be-uploaded is already in use
40
40
If True, will skip duplicate global_keys and not upload them
41
41
If False, will generate a unique global_key with a suffix {divider} + "1", "2" and so on
@@ -46,10 +46,13 @@ def create_data_rows_from_table(
46
46
# row_data_col : column with name "row_data"
47
47
# global_key_col : column with name "global_key" - defaults to row_data_col
48
48
# external_id_col : column with name "external_id" - defaults to global_key_col
49
+ # project_id_col : column with name "project_id" - defaults to "" (requires project_id input argument if no "project_id" column exists)
50
+ # dataset_id_col : column with name "dataset_id" - defaults to "" (requires project_id input argument if no "dataset_id" column exists)
51
+ # external_id_col : column with name "external_id" - defaults to global_key_col
49
52
# metadata_index : Dictonary where {key=column_name : value=metadata_type}
50
53
# attachment_index : Dictonary where {key=column_name : value=attachment_type}
51
54
# annotation_index : Dictonary where {key=column_name : value=annotation_type}
52
- row_data_col , global_key_col , external_id_col , metadata_index , attachment_index , annotation_index = labelbase .connector .validate_columns (
55
+ row_data_col , global_key_col , external_id_col , project_id_col , dataset_id_col , metadata_index , attachment_index , annotation_index = labelbase .connector .validate_columns (
53
56
table = table ,
54
57
get_columns_function = connector .get_columns_function ,
55
58
get_unique_values_function = connector .get_unique_values_function ,
@@ -58,69 +61,75 @@ def create_data_rows_from_table(
58
61
extra_client = None
59
62
)
60
63
61
- # Create a dictionary where {key=global_key : value=data_row_upload_dict} - this is unique to Pandas
62
- global_key_to_upload_dict , data_row_conversion_errors = connector .create_data_row_upload_dict (
63
- client = self .lb_client , table = table ,
64
- row_data_col = row_data_col , global_key_col = global_key_col , external_id_col = external_id_col ,
65
- metadata_index = metadata_index , attachment_index = attachment_index ,
66
- divider = divider , verbose = verbose
67
- )
64
+ # Iterating over your pandas DataFrame is faster once converted to a list of dictionaries where {key=column_name : value=row_value}
65
+ table_dict = table .to_dict ('records' )
68
66
69
- # If there are conversion errors, let the user know; if there are no successful conversions, terminate the upload
70
- if data_row_conversion_errors :
71
- print (f'There were { len (data_row_conversion_errors )} errors in creating your upload list - see result["data_row_conversion_errors"] for more information' )
72
- if global_key_to_upload_dict :
73
- print (f'Data row upload will continue' )
74
- else :
75
- print (f'Data row upload will not continue' )
76
- return {
77
- "data_row_upload_results" : [], "data_row_conversion_errors" : data_row_conversion_errors ,
78
- "batch_to_project_errors" : [], "annotation_upload_results" : [], "annotation_conversion_errors" : []
79
- }
67
+ if (dataset_id_col == "" ) and (dataset_id == "" ):
68
+ raise ValueError (f"To create data rows, please provide either a `dataset_id` column or a Labelbox dataset id to argument `dataset_id`" )
69
+
70
+ if (upload_method != "" ) and (project_id_col == "" ) and (project_id == "" ) and (annotation_index != {}):
71
+ raise ValueError (f"To upload annotations, please provide either a `project_id` column or a Lablebox project id to argument `project_id`" )
72
+
73
+ # Create a dictionary where {key=dataset_id : value={key=global_key : value=data_row_upload_dict}} - this is unique to Pandas
74
+ dataset_to_global_key_to_upload_dict = labelpandas .data_rows .create_data_row_upload_dict (
75
+ client = self .lb_client , table = table , table_dict = table_dict ,
76
+ row_data_col = row_data_col , global_key_col = global_key_col , external_id_col = external_id_col , dataset_id_col = dataset_id_col ,
77
+ dataset_id = dataset_id , metadata_index = metadata_index , attachment_index = attachment_index ,
78
+ divider = divider , verbose = verbose , extra_client = None
79
+ )
80
80
81
81
# Upload your data rows to Labelbox
82
82
data_row_upload_results = labelbase .uploader .batch_create_data_rows (
83
- client = self .lb_client , dataset = lb_dataset , global_key_to_upload_dict = global_key_to_upload_dict ,
83
+ client = self .lb_client , dataset_to_global_key_to_upload_dict = dataset_to_global_key_to_upload_dict ,
84
84
skip_duplicates = skip_duplicates , divider = divider , verbose = verbose
85
85
)
86
86
87
- # Default global_key_col if row_data_col not provided
88
- global_key_col = global_key_col if global_key_col else row_data_col
89
-
90
- # Create a dictionary where {key=global_key : value=data_row_id}
91
- global_key_to_data_row_id = uploader .create_global_key_to_data_row_dict (
92
- client = self .lb_client , global_keys = connector .get_unique_values_function (table , global_key_col )
93
- )
94
-
95
- # Create a dictionary where {key=project_id : value=list_of_data_row_ids}, if applicable
96
- project_id_to_batch_dict , batch_to_project_errors = connector .create_batches (
97
- client = self .lb_client , table = table , global_key_col = global_key_col ,
98
- project_id_col = project_id_col , global_key_to_data_row_id = global_key_to_data_row_id
99
- )
87
+ # If project ids are provided, we can batch data rows to projects
88
+ if project_id or project_id_col :
89
+
90
+ # Create a dictionary where {key=global_key : value=data_row_id}
91
+ global_key_to_data_row_id = labelbase .uploader .create_global_key_to_data_row_dict (
92
+ client = self .lb_client , global_keys = connector .get_unique_values_function (table , global_key_col )
93
+ )
94
+
95
+ # Create a dictionary where {key=project_id : value=list_of_data_row_ids}, if applicable
96
+ project_id_to_batch_dict = labelpandas .batches .create_batches_dict (
97
+ client = self .lb_client , table = table , table_dict = table_dict ,
98
+ global_key_col = global_key_col , project_id_col = project_id_col ,
99
+ global_key_to_data_row_id = global_key_to_data_row_id
100
+ )
100
101
101
- # Batch data rows to projects, if applicable
102
- if not batch_to_project_errors :
103
- batch_to_project_errors = uploader .batch_rows_to_project (
102
+ # Batch data rows to projects, if applicable
103
+ batch_to_project_results = labelbase .uploader .batch_rows_to_project (
104
104
client = self .lb_client , project_id_to_batch_dict , priority = priority
105
105
)
106
106
107
- # Create a dictionary where {key=project_id : value=annotation_upload_list}, if applicable
108
- project_id_to_upload_dict , annotation_conversion_errors = connector .create_annotation_upload_dict (
109
- client = self .lb_client , table = table , row_data_col = row_data_col , global_key_col = global_key_col ,
110
- project_id_col = project_id_col , annotation_index = annotation_index , divider = divider , verbose = verbose
111
- )
107
+ if (upload_method in ["mal" , "import" ]) and (annotation_index != {}):
108
+
109
+ # Create a dictionary where {key=project_id : value=annotation_upload_list}, if applicable
110
+ project_id_to_upload_dict = connector .create_annotation_upload_dict (
111
+ client = self .lb_client , table = table , table_dict = table_dict ,
112
+ row_data_col = row_data_col , global_key_col = global_key_col , project_id_col = project_id_col ,
113
+ annotation_index = annotation_index , divider = divider , verbose = verbose
114
+ )
115
+
116
+ # Upload your annotations to Labelbox, if applicable
117
+ annotation_upload_results = uploader .batch_upload_annotations (
118
+ client = self .lb_client , project_id_to_upload_dict = project_id_to_upload_dict , how = upload_method , verbose = verbose
119
+ )
120
+
121
+ else :
122
+ annotation_upload_results = []
123
+
112
124
113
- # Upload your annotations to Labelbox, if applicable
114
- annotation_upload_results = uploader .batch_upload_annotations (
115
- client = self .lb_client , project_id_to_upload_dict = project_id_to_upload_dict , how = upload_method , verbose = verbose
116
- )
125
+ else :
126
+ batch_to_project_results = []
127
+ annotation_upload_results = []
117
128
118
129
return {
119
130
"data_row_upload_results" : data_row_upload_results ,
120
- "data_row_conversion_errors" : data_row_conversion_errors ,
121
- "batch_to_project_errors" : batch_to_project_errors ,
122
- "annotation_upload_results" : annotation_upload_results ,
123
- "annotation_conversion_errors" : annotation_conversion_errors
131
+ "batch_to_project_results" : batch_to_project_results ,
132
+ "annotation_upload_results" : annotation_upload_results
124
133
}
125
134
126
135
# def upsert_table_metadata():
0 commit comments