1
1
from labelbase .metadata import get_metadata_schema_to_name_key , process_metadata_value
2
- from labelbox import Client
2
+ from labelbox import labelboxClient
3
3
import pandas
4
4
from concurrent .futures import ThreadPoolExecutor , as_completed
5
5
from tqdm .autonotebook import tqdm
@@ -29,16 +29,16 @@ def create_batches(table=pandas.core.frame.DataFrame, global_key_col:str, projec
29
29
project_id_to_batch_dict [project_id ].append (data_row_id )
30
30
return project_id_to_batch_dict
31
31
32
- def create_annotation_upload_dict ():
32
+ def create_annotation_upload_dict (client : labelboxClient , table : pandas . core . frame . DataFrame , ):
33
33
return global_key_to_upload_dict , errors
34
34
35
- def create_data_row_upload_dict (table :pandas .core .frame .DataFrame , lb_client : Client , row_data_col :str ,
36
- global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, local_files : bool = False ,
37
- divider :str = "///" , verbose = False ):
35
+ def create_data_row_upload_dict (client : labelboxClient , table :pandas .core .frame .DataFrame , row_data_col :str ,
36
+ global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, attachment_index : dict = attachment_index
37
+ local_files : bool = False , divider :str = "///" , verbose = False ):
38
38
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
39
39
Args:
40
40
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
41
- lb_client : Required (labelbox.client.Client) - Labelbox Client object
41
+ client : Required (labelbox.client.Client) - Labelbox Client object
42
42
row_data_col : Required (str) - Column containing asset URL or file path
43
43
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
44
44
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
@@ -70,14 +70,14 @@ def create_data_row_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Cli
70
70
print (f'Submitting data rows...' )
71
71
for index , row in tqdm (table .iterrows ()):
72
72
futures .append (exc .submit (
73
- create_data_rows , lb_client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
74
- row_data_col , global_key_col , external_id_col , metadata_index , local_files , divider
73
+ create_data_rows , client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
74
+ row_data_col , global_key_col , external_id_col , metadata_index , attachment_index , local_files , divider
75
75
))
76
76
else :
77
77
for index , row in table .iterrows ():
78
78
futures .append (exc .submit (
79
- create_data_rows , lb_client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
80
- row_data_col , global_key_col , external_id_col , metadata_index , local_files , divider
79
+ create_data_rows , client , row , metadata_name_key_to_schema , metadata_schema_to_name_key ,
80
+ row_data_col , global_key_col , external_id_col , metadata_index , attachment_index , local_files , divider
81
81
))
82
82
if verbose :
83
83
print (f'Processing data rows...' )
@@ -98,12 +98,12 @@ def create_data_row_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Cli
98
98
print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
99
99
return global_key_to_upload_dict , errors
100
100
101
- def create_data_rows (lb_client : Client , row :pandas .core .series .Series ,
101
+ def create_data_rows (client : labelboxClient , row :pandas .core .series .Series ,
102
102
metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict , row_data_col :str ,
103
- global_key_col :str , external_id_col :str , metadata_index :dict , local_files :bool , divider :str ):
103
+ global_key_col :str , external_id_col :str , metadata_index :dict , attachment_index : dict , local_files :bool , divider :str ):
104
104
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
105
105
Args:
106
- lb_client : Required (labelbox.client.Client) - Labelbox Client object
106
+ client. : Required (labelbox.client.Client) - Labelbox Client object
107
107
row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
108
108
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
109
109
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
@@ -112,6 +112,8 @@ def create_data_rows(lb_client:Client, row:pandas.core.series.Series,
112
112
external_id_col : Required (str) - Column name containing the data row external ID
113
113
metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type}
114
114
metadata_type must be either "enum", "string", "datetime" or "number"
115
+ attachment_index : Required (dict) - Dictionary where {key=column_name : value=attachment_type}
116
+ attachment_type must be one of "IMAGE", "VIDEO", "RAW_TEXT", "HTML", "TEXT_URL"
115
117
local_files : Required (bool) - Determines how to handle row_data_col values
116
118
If True, treats row_data_col values as file paths uploads the local files to Labelbox
117
119
If False, treats row_data_col values as urls (assuming delegated access is set up)
@@ -123,21 +125,25 @@ def create_data_rows(lb_client:Client, row:pandas.core.series.Series,
123
125
"""
124
126
return_value = {"error" : None , "data_row" : {}}
125
127
try :
126
- return_value ["data_row" ]["row_data" ] = lb_client .upload_file (str (row [row_data_col ])) if local_files else str (row [row_data_col ])
128
+ return_value ["data_row" ]["row_data" ] = client .upload_file (str (row [row_data_col ])) if local_files else str (row [row_data_col ])
127
129
return_value ["data_row" ]["global_key" ] = str (row [global_key_col ])
128
130
return_value ["data_row" ]["external_id" ] = str (row [external_id_col ])
129
131
metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
130
132
if metadata_index :
131
133
for metadata_field_name in metadata_index .keys ():
132
134
input_metadata = process_metadata_value (
133
- client = lb_client , metadata_value = row [metadata_field_name ], metadata_type = metadata_index [metadata_field_name ],
135
+ client = client , metadata_value = row [metadata_field_name ], metadata_type = metadata_index [metadata_field_name ],
134
136
parent_name = metadata_field_name , metadata_name_key_to_schema = metadata_name_key_to_schema , divider = divider
135
137
)
136
138
if input_metadata :
137
139
metadata_fields .append ({"schema_id" : metadata_name_key_to_schema [metadata_field_name ], "value" : input_metadata })
138
140
else :
139
141
continue
140
- return_value ["data_row" ]["metadata_fields" ] = metadata_fields
142
+ return_value ["data_row" ]["metadata_fields" ] = metadata_fields
143
+ if attachment_index :
144
+ return_value ['data_row' ]['attachments' ] = []
145
+ for column_name in attachment_index :
146
+ return_value ['data_row' ]['attachments' ].append ({"type" : attachment_index [column_name ], "value" : row [column_name ]})
141
147
except Exception as e :
142
148
return_value ["error" ] = e
143
149
return_value ["data_row" ]["global_key" ] = str (row [global_key_col ])
0 commit comments