1
1
from labelbox import Client as labelboxClient
2
2
from labelbox .schema .dataset import Dataset as labelboxDataset
3
- from labelbase .metadata import sync_metadata_fields
4
- from labelbase import uploader
5
- import pandas as pd
6
3
from labelpandas import connector
4
+ import labelbase
5
+ import pandas as pd
6
+
7
7
8
8
class Client ():
9
9
"""
@@ -27,71 +27,43 @@ def __init__(
27
27
# return table
28
28
29
29
def create_data_rows_from_table (
30
- self , table :pd .core .frame .DataFrame , lb_dataset :labelboxDataset , row_data_col :str , global_key_col = None , external_id_col = None ,
31
- project_id_col :str = "" , priority = 5 , metadata_index :dict = {}, attachment_index :dict = {}, annotation_index :dict = {}, upload_method :str = "" ,
32
- local_files :bool = False , skip_duplicates :bool = False , verbose :bool = False , divider = "___" ):
30
+ self , table :pd .core .frame .DataFrame , lb_dataset :labelboxDataset , project_id :str = "" , priority = 5 ,
31
+ upload_method :str = "" , skip_duplicates :bool = False , verbose :bool = False , divider = "///" ):
33
32
""" Creates Labelbox data rows given a Pandas table and a Labelbox Dataset
34
33
Args:
35
34
table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
36
- lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to
37
- row_data_col : Required (str) - Column containing asset URL or file path
38
- local_files : Required (bool) - Determines how to handle row_data_col values
39
- If True, treats row_data_col values as file paths uploads the local files to Labelbox
40
- If False, treats row_data_col values as urls (assuming delegated access is set up)
41
- global_key_col : Optional (str) - Column name containing the data row global key - defaults to row_data_col
42
- external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global_key_col
43
- project_id_col : Optional (str) - If provided, batches data rows to project ID in question
44
- priority : Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects
45
- metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type}
46
- metadata_type must be either "enum", "string", "datetime" or "number"
47
- attachment_index : Optional (dict) - Dictionary where {key=column_name : value=attachment_type}
48
- attachment_type must be one of "IMAGE", "VIDEO", "RAW_TEXT", "HTML", "TEXT_URL"
49
- annotation_index : Optional (dict) - Dictionary where {key=column_name : value=annotation_type} -- requires a project_id_col and an upload_method
50
- annotation_type must be one of the following - the format of the cell value must align with annotation type
51
- bbox : [[name_paths], [top, left, height, width], [name_paths], [top, left, height, width]]
52
- polygon : [[name_paths], [(x, y), (x, y),...(x, y)], [name_paths], [(x, y), (x, y),...(x, y)]]
53
- point : [[name_paths], [x, y], [name_paths], [x, y]]
54
- mask : [[name_paths], URL], [name_paths], URL] OR [[name_paths], 2D_ARRAY], [name_paths], 2D_ARRAY]
55
- - URL must be a publicly accessible string
56
- - 2D_ARRAY must be a numpy array where mask values are the color (0,0)
57
- line : [[name_paths], [(x, y), (x, y),...(x, y)], [name_paths], [(x, y), (x, y),...(x, y)]]
58
- ner : [[name_paths], [start, end], [name_paths], [start, end]]
59
- radio : [name_paths]
60
- check : [name_paths]
61
- text : name_path, [text value]
62
- name_paths is parent///child///parent///child - you can specify the delimiter with the `divider` argument
63
- name_paths is passed as a list per annotation, since you can have multiple nested classifications in any given annotation
64
- for tools, you can have multiple annotations of the same class in a given data row, so your input is a list of name_paths and annotation values
35
+ lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to
36
+ project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no project_id column exists
37
+ priority : Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects
65
38
upload_method : Optional (str) - Either "mal" or "import" - required if an annotation_index is provided
66
39
skip_duplicates : Optional (bool) - Determines how to handle if a global key to-be-uploaded is already in use
67
40
If True, will skip duplicate global_keys and not upload them
68
41
If False, will generate a unique global_key with a suffix {divider} + "1", "2" and so on
69
42
verbose : Required (bool) - If True, prints details about code execution; if False, prints minimal information
70
43
divider : Optional (str) - String delimiter for schema name keys and suffix added to duplocate global keys
71
- Returns:
72
- A dictionary with "upload_results" and "conversion_errors" keys
73
- - "upload_results" key pertains to the results of the data row upload itself
74
- - "conversion_errors" key pertains to any errors related to data row conversion
75
- """
76
-
77
- # Ensure all your metadata_index keys are metadata fields in Labelbox and that your Pandas DataFrame has all the right columns
78
- table = sync_metadata_fields (
79
- client = self .lb_client , table = table , get_columns_function = connector .get_columns_function , add_column_function = connector .add_column_function ,
80
- get_unique_values_function = connector .get_unique_values_function , metadata_index = metadata_index , verbose = verbose
44
+ """
45
+ # Create a metadata_index, attachment_index, and annotation_index
46
+ # row_data_col : column with name "row_data"
47
+ # global_key_col : column with name "global_key" - defaults to row_data_col
48
+ # external_id_col : column with name "external_id" - defaults to global_key_col
49
+ # metadata_index : Dictonary where {key=column_name : value=metadata_type}
50
+ # attachment_index : Dictonary where {key=column_name : value=attachment_type}
51
+ # annotation_index : Dictonary where {key=column_name : value=annotation_type}
52
+ row_data_col , global_key_col , external_id_col , metadata_index , attachment_index , annotation_index = labelbase .validate_columns (
53
+ table = table ,
54
+ get_columns_function = connector .get_columns_function ,
55
+ get_unique_values_function = connector .get_unique_values_function ,
56
+ divider = divider ,
57
+ verbose = verbose ,
58
+ extra_client = None
81
59
)
82
60
83
- # If df returns False, the sync failed - terminate the upload
84
- if type (table ) == bool :
85
- return {
86
- "data_row_upload_results" : [], "data_row_conversion_errors" : [],
87
- "batch_to_project_errors" : [], "annotation_upload_results" : [], "annotation_conversion_errors" : []
88
- }
89
-
90
61
# Create a dictionary where {key=global_key : value=data_row_upload_dict} - this is unique to Pandas
91
62
global_key_to_upload_dict , data_row_conversion_errors = connector .create_data_row_upload_dict (
92
- client = self .lb_client , table = table , row_data_col = row_data_col ,
93
- global_key_col = global_key_col , external_id_col = external_id_col , metadata_index = metadata_index ,
94
- attachment_index = attachment_index , local_files = local_files , divider = divider , verbose = verbose
63
+ client = self .lb_client , table = table ,
64
+ row_data_col = row_data_col , global_key_col = global_key_col , external_id_col = external_id_col ,
65
+ metadata_index = metadata_index , attachment_index = attachment_index ,
66
+ divider = divider , verbose = verbose
95
67
)
96
68
97
69
# If there are conversion errors, let the user know; if there are no successful conversions, terminate the upload
0 commit comments