Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 12197b7

Browse files
Update client.py
1 parent fc6a1a3 commit 12197b7

File tree

1 file changed

+27
-55
lines changed

1 file changed

+27
-55
lines changed

labelpandas/client.py

Lines changed: 27 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from labelbox import Client as labelboxClient
22
from labelbox.schema.dataset import Dataset as labelboxDataset
3-
from labelbase.metadata import sync_metadata_fields
4-
from labelbase import uploader
5-
import pandas as pd
63
from labelpandas import connector
4+
import labelbase
5+
import pandas as pd
6+
77

88
class Client():
99
"""
@@ -27,71 +27,43 @@ def __init__(
2727
# return table
2828

2929
def create_data_rows_from_table(
30-
self, table:pd.core.frame.DataFrame, lb_dataset:labelboxDataset, row_data_col:str, global_key_col=None, external_id_col=None,
31-
project_id_col:str="", priority=5, metadata_index:dict={}, attachment_index:dict={}, annotation_index:dict={}, upload_method:str="",
32-
local_files:bool=False, skip_duplicates:bool=False, verbose:bool=False, divider="___"):
30+
self, table:pd.core.frame.DataFrame, lb_dataset:labelboxDataset, project_id:str="", priority=5,
31+
upload_method:str="", skip_duplicates:bool=False, verbose:bool=False, divider="///"):
3332
""" Creates Labelbox data rows given a Pandas table and a Labelbox Dataset
3433
Args:
3534
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
36-
lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to
37-
row_data_col : Required (str) - Column containing asset URL or file path
38-
local_files : Required (bool) - Determines how to handle row_data_col values
39-
If True, treats row_data_col values as file paths uploads the local files to Labelbox
40-
If False, treats row_data_col values as urls (assuming delegated access is set up)
41-
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row_data_col
42-
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global_key_col
43-
project_id_col : Optional (str) - If provided, batches data rows to project ID in question
44-
priority : Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects
45-
metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type}
46-
metadata_type must be either "enum", "string", "datetime" or "number"
47-
attachment_index : Optional (dict) - Dictionary where {key=column_name : value=attachment_type}
48-
attachment_type must be one of "IMAGE", "VIDEO", "RAW_TEXT", "HTML", "TEXT_URL"
49-
annotation_index : Optional (dict) - Dictionary where {key=column_name : value=annotation_type} -- requires a project_id_col and an upload_method
50-
annotation_type must be one of the following - the format of the cell value must align with annotation type
51-
bbox : [[name_paths], [top, left, height, width], [name_paths], [top, left, height, width]]
52-
polygon : [[name_paths], [(x, y), (x, y),...(x, y)], [name_paths], [(x, y), (x, y),...(x, y)]]
53-
point : [[name_paths], [x, y], [name_paths], [x, y]]
54-
mask : [[name_paths], URL], [name_paths], URL] OR [[name_paths], 2D_ARRAY], [name_paths], 2D_ARRAY]
55-
- URL must be a publicly accessible string
56-
- 2D_ARRAY must be a numpy array where mask values are the color (0,0)
57-
line : [[name_paths], [(x, y), (x, y),...(x, y)], [name_paths], [(x, y), (x, y),...(x, y)]]
58-
ner : [[name_paths], [start, end], [name_paths], [start, end]]
59-
radio : [name_paths]
60-
check : [name_paths]
61-
text : name_path, [text value]
62-
name_paths is parent///child///parent///child - you can specify the delimiter with the `divider` argument
63-
name_paths is passed as a list per annotation, since you can have multiple nested classifications in any given annotation
64-
for tools, you can have multiple annotations of the same class in a given data row, so your input is a list of name_paths and annotation values
35+
lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to
36+
project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no project_id column exists
37+
priority : Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects
6538
upload_method : Optional (str) - Either "mal" or "import" - required if an annotation_index is provided
6639
skip_duplicates : Optional (bool) - Determines how to handle if a global key to-be-uploaded is already in use
6740
If True, will skip duplicate global_keys and not upload them
6841
If False, will generate a unique global_key with a suffix {divider} + "1", "2" and so on
6942
verbose : Required (bool) - If True, prints details about code execution; if False, prints minimal information
7043
divider : Optional (str) - String delimiter for schema name keys and suffix added to duplocate global keys
71-
Returns:
72-
A dictionary with "upload_results" and "conversion_errors" keys
73-
- "upload_results" key pertains to the results of the data row upload itself
74-
- "conversion_errors" key pertains to any errors related to data row conversion
75-
"""
76-
77-
# Ensure all your metadata_index keys are metadata fields in Labelbox and that your Pandas DataFrame has all the right columns
78-
table = sync_metadata_fields(
79-
client=self.lb_client, table=table, get_columns_function=connector.get_columns_function, add_column_function=connector.add_column_function,
80-
get_unique_values_function=connector.get_unique_values_function, metadata_index=metadata_index, verbose=verbose
44+
"""
45+
# Create a metadata_index, attachment_index, and annotation_index
46+
# row_data_col : column with name "row_data"
47+
# global_key_col : column with name "global_key" - defaults to row_data_col
48+
# external_id_col : column with name "external_id" - defaults to global_key_col
49+
# metadata_index : Dictonary where {key=column_name : value=metadata_type}
50+
# attachment_index : Dictonary where {key=column_name : value=attachment_type}
51+
# annotation_index : Dictonary where {key=column_name : value=annotation_type}
52+
row_data_col, global_key_col, external_id_col, metadata_index, attachment_index, annotation_index = labelbase.validate_columns(
53+
table=table,
54+
get_columns_function=connector.get_columns_function,
55+
get_unique_values_function=connector.get_unique_values_function,
56+
divider=divider,
57+
verbose=verbose,
58+
extra_client=None
8159
)
8260

83-
# If df returns False, the sync failed - terminate the upload
84-
if type(table) == bool:
85-
return {
86-
"data_row_upload_results" : [], "data_row_conversion_errors" : [],
87-
"batch_to_project_errors" : [], "annotation_upload_results" : [], "annotation_conversion_errors" : []
88-
}
89-
9061
# Create a dictionary where {key=global_key : value=data_row_upload_dict} - this is unique to Pandas
9162
global_key_to_upload_dict, data_row_conversion_errors = connector.create_data_row_upload_dict(
92-
client=self.lb_client, table=table, row_data_col=row_data_col,
93-
global_key_col=global_key_col, external_id_col=external_id_col, metadata_index=metadata_index,
94-
attachment_index=attachment_index, local_files=local_files, divider=divider, verbose=verbose
63+
client=self.lb_client, table=table,
64+
row_data_col=row_data_col, global_key_col=global_key_col, external_id_col=external_id_col,
65+
metadata_index=metadata_index, attachment_index=attachment_index,
66+
divider=divider, verbose=verbose
9567
)
9668

9769
# If there are conversion errors, let the user know; if there are no successful conversions, terminate the upload

0 commit comments

Comments
 (0)