Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 3b5c351

Browse files
Update connector.py
1 parent 03d893a commit 3b5c351

File tree

1 file changed

+22
-16
lines changed

1 file changed

+22
-16
lines changed

labelpandas/connector.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from labelbase.metadata import get_metadata_schema_to_name_key, process_metadata_value
2-
from labelbox import Client
2+
from labelbox import labelboxClient
33
import pandas
44
from concurrent.futures import ThreadPoolExecutor, as_completed
55
from tqdm.autonotebook import tqdm
@@ -29,16 +29,16 @@ def create_batches(table=pandas.core.frame.DataFrame, global_key_col:str, projec
2929
project_id_to_batch_dict[project_id].append(data_row_id)
3030
return project_id_to_batch_dict
3131

32-
def create_annotation_upload_dict():
32+
def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, ):
3333
return global_key_to_upload_dict, errors
3434

35-
def create_data_row_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Client, row_data_col:str,
36-
global_key_col:str="", external_id_col:str="", metadata_index:dict={}, local_files:bool=False,
37-
divider:str="///", verbose=False):
35+
def create_data_row_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, row_data_col:str,
36+
global_key_col:str="", external_id_col:str="", metadata_index:dict={}, attachment_index:dict=attachment_index
37+
local_files:bool=False, divider:str="///", verbose=False):
3838
""" Multithreads over a Pandas DataFrame, calling create_data_rows() on each row to return an upload dictionary
3939
Args:
4040
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
41-
lb_client : Required (labelbox.client.Client) - Labelbox Client object
41+
client : Required (labelbox.client.Client) - Labelbox Client object
4242
row_data_col : Required (str) - Column containing asset URL or file path
4343
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
4444
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
@@ -70,14 +70,14 @@ def create_data_row_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Cli
7070
print(f'Submitting data rows...')
7171
for index, row in tqdm(table.iterrows()):
7272
futures.append(exc.submit(
73-
create_data_rows, lb_client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
74-
row_data_col, global_key_col, external_id_col, metadata_index, local_files, divider
73+
create_data_rows, client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
74+
row_data_col, global_key_col, external_id_col, metadata_index, attachment_index, local_files, divider
7575
))
7676
else:
7777
for index, row in table.iterrows():
7878
futures.append(exc.submit(
79-
create_data_rows, lb_client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
80-
row_data_col, global_key_col, external_id_col, metadata_index, local_files, divider
79+
create_data_rows, client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
80+
row_data_col, global_key_col, external_id_col, metadata_index, attachment_index, local_files, divider
8181
))
8282
if verbose:
8383
print(f'Processing data rows...')
@@ -98,12 +98,12 @@ def create_data_row_upload_dict(table:pandas.core.frame.DataFrame, lb_client:Cli
9898
print(f'Generated upload list - {len(global_key_to_upload_dict)} data rows to upload')
9999
return global_key_to_upload_dict, errors
100100

101-
def create_data_rows(lb_client:Client, row:pandas.core.series.Series,
101+
def create_data_rows(client:labelboxClient, row:pandas.core.series.Series,
102102
metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict, row_data_col:str,
103-
global_key_col:str, external_id_col:str, metadata_index:dict, local_files:bool, divider:str):
103+
global_key_col:str, external_id_col:str, metadata_index:dict, attachment_index:dict, local_files:bool, divider:str):
104104
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
105105
Args:
106-
lb_client : Required (labelbox.client.Client) - Labelbox Client object
106+
client. : Required (labelbox.client.Client) - Labelbox Client object
107107
row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
108108
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
109109
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
@@ -112,6 +112,8 @@ def create_data_rows(lb_client:Client, row:pandas.core.series.Series,
112112
external_id_col : Required (str) - Column name containing the data row external ID
113113
metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type}
114114
metadata_type must be either "enum", "string", "datetime" or "number"
115+
attachment_index : Required (dict) - Dictionary where {key=column_name : value=attachment_type}
116+
attachment_type must be one of "IMAGE", "VIDEO", "RAW_TEXT", "HTML", "TEXT_URL"
115117
local_files : Required (bool) - Determines how to handle row_data_col values
116118
If True, treats row_data_col values as file paths uploads the local files to Labelbox
117119
If False, treats row_data_col values as urls (assuming delegated access is set up)
@@ -123,21 +125,25 @@ def create_data_rows(lb_client:Client, row:pandas.core.series.Series,
123125
"""
124126
return_value = {"error" : None, "data_row" : {}}
125127
try:
126-
return_value["data_row"]["row_data"] = lb_client.upload_file(str(row[row_data_col])) if local_files else str(row[row_data_col])
128+
return_value["data_row"]["row_data"] = client.upload_file(str(row[row_data_col])) if local_files else str(row[row_data_col])
127129
return_value["data_row"]["global_key"] = str(row[global_key_col])
128130
return_value["data_row"]["external_id"] = str(row[external_id_col])
129131
metadata_fields = [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "Pandas"}]
130132
if metadata_index:
131133
for metadata_field_name in metadata_index.keys():
132134
input_metadata = process_metadata_value(
133-
client=lb_client, metadata_value=row[metadata_field_name], metadata_type=metadata_index[metadata_field_name],
135+
client=client, metadata_value=row[metadata_field_name], metadata_type=metadata_index[metadata_field_name],
134136
parent_name=metadata_field_name, metadata_name_key_to_schema=metadata_name_key_to_schema, divider=divider
135137
)
136138
if input_metadata:
137139
metadata_fields.append({"schema_id" : metadata_name_key_to_schema[metadata_field_name], "value" : input_metadata})
138140
else:
139141
continue
140-
return_value["data_row"]["metadata_fields"] = metadata_fields
142+
return_value["data_row"]["metadata_fields"] = metadata_fields
143+
if attachment_index:
144+
return_value['data_row']['attachments'] = []
145+
for column_name in attachment_index:
146+
return_value['data_row']['attachments'].append({"type" : attachment_index[column_name], "value" : row[column_name]})
141147
except Exception as e:
142148
return_value["error"] = e
143149
return_value["data_row"]["global_key"] = str(row[global_key_col])

0 commit comments

Comments
 (0)