Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit a8e5da9

Browse files
Update client.py
1 parent a5f5f2f commit a8e5da9

File tree

1 file changed

+63
-54
lines changed

1 file changed

+63
-54
lines changed

labelpandas/client.py

Lines changed: 63 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from labelbox import Client as labelboxClient
22
from labelbox.schema.dataset import Dataset as labelboxDataset
3-
from labelpandas import connector
3+
import labelpandas
44
import labelbase
55
import pandas as pd
66

@@ -27,15 +27,15 @@ def __init__(
2727
# return table
2828

2929
def create_data_rows_from_table(
30-
self, table:pd.core.frame.DataFrame, lb_dataset:labelboxDataset, project_id:str="", priority=5,
30+
self, table:pd.core.frame.DataFrame, dataset_id:str"", project_id:str="", priority:int=5,
3131
upload_method:str="", skip_duplicates:bool=False, verbose:bool=False, divider="///"):
3232
""" Creates Labelbox data rows given a Pandas table and a Labelbox Dataset
3333
Args:
3434
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
35-
lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to
36-
project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no project_id column exists
35+
dataset_id : Required (str) - Labelbox dataset ID to add data rows to - only necessary if no "dataset_id" column exists
36+
project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no "project_id" column exists
3737
priority : Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects
38-
upload_method : Optional (str) - Either "mal" or "import" - required if an annotation_index is provided
38+
upload_method : Optional (str) - Either "mal" or "import" - required to upload annotations (otherwise leave as "")
3939
skip_duplicates : Optional (bool) - Determines how to handle if a global key to-be-uploaded is already in use
4040
If True, will skip duplicate global_keys and not upload them
4141
If False, will generate a unique global_key with a suffix {divider} + "1", "2" and so on
@@ -46,10 +46,13 @@ def create_data_rows_from_table(
4646
# row_data_col : column with name "row_data"
4747
# global_key_col : column with name "global_key" - defaults to row_data_col
4848
# external_id_col : column with name "external_id" - defaults to global_key_col
49+
# project_id_col : column with name "project_id" - defaults to "" (requires project_id input argument if no "project_id" column exists)
50+
# dataset_id_col : column with name "dataset_id" - defaults to "" (requires project_id input argument if no "dataset_id" column exists)
51+
# external_id_col : column with name "external_id" - defaults to global_key_col
4952
# metadata_index : Dictonary where {key=column_name : value=metadata_type}
5053
# attachment_index : Dictonary where {key=column_name : value=attachment_type}
5154
# annotation_index : Dictonary where {key=column_name : value=annotation_type}
52-
row_data_col, global_key_col, external_id_col, metadata_index, attachment_index, annotation_index = labelbase.connector.validate_columns(
55+
row_data_col, global_key_col, external_id_col, project_id_col, dataset_id_col, metadata_index, attachment_index, annotation_index = labelbase.connector.validate_columns(
5356
table=table,
5457
get_columns_function=connector.get_columns_function,
5558
get_unique_values_function=connector.get_unique_values_function,
@@ -58,69 +61,75 @@ def create_data_rows_from_table(
5861
extra_client=None
5962
)
6063

61-
# Create a dictionary where {key=global_key : value=data_row_upload_dict} - this is unique to Pandas
62-
global_key_to_upload_dict, data_row_conversion_errors = connector.create_data_row_upload_dict(
63-
client=self.lb_client, table=table,
64-
row_data_col=row_data_col, global_key_col=global_key_col, external_id_col=external_id_col,
65-
metadata_index=metadata_index, attachment_index=attachment_index,
66-
divider=divider, verbose=verbose
67-
)
64+
# Iterating over your pandas DataFrame is faster once converted to a list of dictionaries where {key=column_name : value=row_value}
65+
table_dict = table.to_dict('records')
6866

69-
# If there are conversion errors, let the user know; if there are no successful conversions, terminate the upload
70-
if data_row_conversion_errors:
71-
print(f'There were {len(data_row_conversion_errors)} errors in creating your upload list - see result["data_row_conversion_errors"] for more information')
72-
if global_key_to_upload_dict:
73-
print(f'Data row upload will continue')
74-
else:
75-
print(f'Data row upload will not continue')
76-
return {
77-
"data_row_upload_results" : [], "data_row_conversion_errors" : data_row_conversion_errors,
78-
"batch_to_project_errors" : [], "annotation_upload_results" : [], "annotation_conversion_errors" : []
79-
}
67+
if (dataset_id_col=="") and (dataset_id==""):
68+
raise ValueError(f"To create data rows, please provide either a `dataset_id` column or a Labelbox dataset id to argument `dataset_id`")
69+
70+
if (upload_method!="") and (project_id_col=="") and (project_id=="") and (annotation_index!={}):
71+
raise ValueError(f"To upload annotations, please provide either a `project_id` column or a Lablebox project id to argument `project_id`")
72+
73+
# Create a dictionary where {key=dataset_id : value={key=global_key : value=data_row_upload_dict}} - this is unique to Pandas
74+
dataset_to_global_key_to_upload_dict = labelpandas.data_rows.create_data_row_upload_dict(
75+
client=self.lb_client, table=table, table_dict=table_dict,
76+
row_data_col=row_data_col, global_key_col=global_key_col, external_id_col=external_id_col, dataset_id_col=dataset_id_col,
77+
dataset_id=dataset_id, metadata_index=metadata_index, attachment_index=attachment_index,
78+
divider=divider, verbose=verbose, extra_client=None
79+
)
8080

8181
# Upload your data rows to Labelbox
8282
data_row_upload_results = labelbase.uploader.batch_create_data_rows(
83-
client=self.lb_client, dataset=lb_dataset, global_key_to_upload_dict=global_key_to_upload_dict,
83+
client=self.lb_client, dataset_to_global_key_to_upload_dict=dataset_to_global_key_to_upload_dict,
8484
skip_duplicates=skip_duplicates, divider=divider, verbose=verbose
8585
)
8686

87-
# Default global_key_col if row_data_col not provided
88-
global_key_col = global_key_col if global_key_col else row_data_col
89-
90-
# Create a dictionary where {key=global_key : value=data_row_id}
91-
global_key_to_data_row_id = uploader.create_global_key_to_data_row_dict(
92-
client=self.lb_client, global_keys=connector.get_unique_values_function(table, global_key_col)
93-
)
94-
95-
# Create a dictionary where {key=project_id : value=list_of_data_row_ids}, if applicable
96-
project_id_to_batch_dict, batch_to_project_errors = connector.create_batches(
97-
client=self.lb_client, table=table, global_key_col=global_key_col,
98-
project_id_col=project_id_col, global_key_to_data_row_id=global_key_to_data_row_id
99-
)
87+
# If project ids are provided, we can batch data rows to projects
88+
if project_id or project_id_col:
89+
90+
# Create a dictionary where {key=global_key : value=data_row_id}
91+
global_key_to_data_row_id = labelbase.uploader.create_global_key_to_data_row_dict(
92+
client=self.lb_client, global_keys=connector.get_unique_values_function(table, global_key_col)
93+
)
94+
95+
# Create a dictionary where {key=project_id : value=list_of_data_row_ids}, if applicable
96+
project_id_to_batch_dict = labelpandas.batches.create_batches_dict(
97+
client=self.lb_client, table=table, table_dict=table_dict,
98+
global_key_col=global_key_col, project_id_col=project_id_col,
99+
global_key_to_data_row_id=global_key_to_data_row_id
100+
)
100101

101-
# Batch data rows to projects, if applicable
102-
if not batch_to_project_errors:
103-
batch_to_project_errors = uploader.batch_rows_to_project(
102+
# Batch data rows to projects, if applicable
103+
batch_to_project_results = labelbase.uploader.batch_rows_to_project(
104104
client=self.lb_client, project_id_to_batch_dict, priority=priority
105105
)
106106

107-
# Create a dictionary where {key=project_id : value=annotation_upload_list}, if applicable
108-
project_id_to_upload_dict, annotation_conversion_errors = connector.create_annotation_upload_dict(
109-
client=self.lb_client, table=table, row_data_col=row_data_col, global_key_col=global_key_col,
110-
project_id_col=project_id_col, annotation_index=annotation_index, divider=divider, verbose=verbose
111-
)
107+
if (upload_method in ["mal", "import"]) and (annotation_index!={}):
108+
109+
# Create a dictionary where {key=project_id : value=annotation_upload_list}, if applicable
110+
project_id_to_upload_dict = connector.create_annotation_upload_dict(
111+
client=self.lb_client, table=table, table_dict=table_dict,
112+
row_data_col=row_data_col, global_key_col=global_key_col, project_id_col=project_id_col,
113+
annotation_index=annotation_index, divider=divider, verbose=verbose
114+
)
115+
116+
# Upload your annotations to Labelbox, if applicable
117+
annotation_upload_results = uploader.batch_upload_annotations(
118+
client=self.lb_client, project_id_to_upload_dict=project_id_to_upload_dict, how=upload_method, verbose=verbose
119+
)
120+
121+
else:
122+
annotation_upload_results = []
123+
112124

113-
# Upload your annotations to Labelbox, if applicable
114-
annotation_upload_results = uploader.batch_upload_annotations(
115-
client=self.lb_client, project_id_to_upload_dict=project_id_to_upload_dict, how=upload_method, verbose=verbose
116-
)
125+
else:
126+
batch_to_project_results = []
127+
annotation_upload_results = []
117128

118129
return {
119130
"data_row_upload_results" : data_row_upload_results,
120-
"data_row_conversion_errors" : data_row_conversion_errors,
121-
"batch_to_project_errors" : batch_to_project_errors,
122-
"annotation_upload_results" : annotation_upload_results,
123-
"annotation_conversion_errors" : annotation_conversion_errors
131+
"batch_to_project_results" : batch_to_project_results,
132+
"annotation_upload_results" : annotation_upload_results
124133
}
125134

126135
# def upsert_table_metadata():

0 commit comments

Comments
 (0)