Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 7fb1366

Browse files
Update data_rows.py
1 parent fa81989 commit 7fb1366

File tree

1 file changed

+18
-20
lines changed

1 file changed

+18
-20
lines changed

labelpandas/data_rows.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,18 @@ def create_data_row_upload_dict(client:labelboxClient, table:pd.core.frame.DataF
3838
dataset_to_global_key_to_upload_dict = {dataset_id : {}}
3939
else:
4040
dataset_to_global_key_to_upload_dict = {id : {} for id in connector.get_unique_values_function(table=table)}
41+
df_dict = df.to_dict('records')
4142
with ThreadPoolExecutor(max_workers=8) as exc:
42-
global_key_to_upload_dict = {}
4343
errors = []
4444
futures = []
4545
if verbose:
4646
print(f'Submitting data rows...')
47-
for index, row in tqdm(table.iterrows()):
47+
for index, row_dict in tqdm(df_dict):
4848
futures.append(exc.submit(
49-
create_data_rows, client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
49+
create_data_rows, client, row_dict, metadata_name_key_to_schema, metadata_schema_to_name_key,
5050
row_data_col, global_key_col, external_id_col, dataset_id_col,
5151
dataset_id, metadata_index, attachment_index, divider
5252
))
53-
else:
54-
for index, row in table.iterrows():
55-
futures.append(exc.submit(
56-
create_data_rows, client, row, metadata_name_key_to_schema, metadata_schema_to_name_key,
57-
row_data_col, global_key_col, external_id_col, dataset_id_col,
58-
dataset_id, metadata_index, attachment_index, divider
59-
))
60-
if verbose:
6153
print(f'Processing data rows...')
6254
for f in tqdm(as_completed(futures)):
6355
res = f.result()
@@ -67,8 +59,14 @@ def create_data_row_upload_dict(client:labelboxClient, table:pd.core.frame.DataF
6759
id = str(list(res.keys()))[0]
6860
data_row_dict = res["res"][id]
6961
global_key = str(data_row_dict["global_key"])
70-
dataset_to_global_key_to_upload_dict[id].update({global_key:data_row_dict})
62+
dataset_to_global_key_to_upload_dict[id].update({global_key:data_row_dict})
7163
else:
64+
for index, row in table.iterrows():
65+
futures.append(exc.submit(
66+
create_data_rows, client, row_dict, metadata_name_key_to_schema, metadata_schema_to_name_key,
67+
row_data_col, global_key_col, external_id_col, dataset_id_col,
68+
dataset_id, metadata_index, attachment_index, divider
69+
))
7270
for f in as_completed(futures):
7371
res = f.result()
7472
if res['error']:
@@ -82,15 +80,15 @@ def create_data_row_upload_dict(client:labelboxClient, table:pd.core.frame.DataF
8280
print(f'Generated upload list')
8381
return global_key_to_upload_dict, errors
8482

85-
def create_data_rows(client:labelboxClient, row:pandas.core.series.Series,
83+
def create_data_rows(client:labelboxClient, row_dict:dict,
8684
metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict,
8785
row_data_col:str, global_key_col:str, external_id_col:str, dataset_id_col:str,
8886
metadata_index:str, metadata_index:dict, attachment_index:dict,
8987
divider:str):
9088
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
9189
Args:
9290
client : Required (labelbox.client.Client) - Labelbox Client object
93-
row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
91+
row_dict : Required (dict) - Dictionary where {key=column_name : value=row_value}
9492
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
9593
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
9694
row_data_col : Required (str) - Column containing asset URL or raw text
@@ -108,16 +106,16 @@ def create_data_rows(client:labelboxClient, row:pandas.core.series.Series,
108106
"""
109107
return_value = {"error" : None, "res" : {}}
110108
try:
111-
id = dataset_id if dataset_id else row["dataset_id_col"]
109+
id = dataset_id if dataset_id else row_dict["dataset_id_col"]
112110
return_value["res"] = {id : {}}
113-
return_value["res"][id]["row_data"] = str(row[row_data_col])
114-
return_value["res"][id]["global_key"] = str(row[global_key_col])
115-
return_value["res"][id]["external_id"] = str(row[external_id_col])
111+
return_value["res"][id]["row_data"] = str(row_dict[row_data_col])
112+
return_value["res"][id]["global_key"] = str(row_dict[global_key_col])
113+
return_value["res"][id]["external_id"] = str(row_dict[external_id_col])
116114
metadata_fields = [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "Pandas"}]
117115
if metadata_index:
118116
for metadata_field_name in metadata_index.keys():
119117
input_metadata = labelbase.metadata.process_metadata_value(
120-
client=client, metadata_value=row[metadata_field_name], metadata_type=metadata_index[metadata_field_name],
118+
client=client, metadata_value=row_dict[metadata_field_name], metadata_type=metadata_index[metadata_field_name],
121119
parent_name=metadata_field_name, metadata_name_key_to_schema=metadata_name_key_to_schema, divider=divider
122120
)
123121
if input_metadata:
@@ -128,7 +126,7 @@ def create_data_rows(client:labelboxClient, row:pandas.core.series.Series,
128126
if attachment_index:
129127
return_value["res"][id]["attachments"] = []
130128
for column_name in attachment_index:
131-
return_value["res"][id]['attachments'].append({"type" : attachment_index[column_name], "value" : row[column_name]})
129+
return_value["res"][id]['attachments'].append({"type" : attachment_index[column_name], "value" : row_dict[column_name]})
132130
except Exception as e:
133131
return_value["error"] = e
134132
return return_value

0 commit comments

Comments
 (0)