|
| 1 | +from labelbox import Client as labelboxClient |
| 2 | +from labelbase import Client as labelbaseClient |
| 3 | +from labelbox.schema.dataset import Dataset as labelboxDataset |
| 4 | +import pandas as pd |
| 5 | +from labelpandas import connector |
| 6 | + |
| 7 | +class Client(): |
| 8 | + """ |
| 9 | + Args: |
| 10 | + pandas_dataframe : Required: pandas.core.frame.DataFrame() object |
| 11 | + lb_client : Required: labelbox.client.Client() |
| 12 | + lb_dataset : labelbox.schema.dataset.Dataset() object - if provided, will create data rows in this dataset |
| 13 | + row_data_col : Column name corresponding to file_path / URL |
| 14 | + external_id_col : Column name corresponding to external ID |
| 15 | + metadata_index : Creates metadata, dictionary where {key=metadata_field_name : value=metadata_type}, where metadata_type is one of "string", "number", "datetime", or "enum" |
| 16 | + attachment_index : Creates attachments, ictionary where {key=attachment_field_name : value=attachment_type}, where attachment_type is one of "image_row_data", "video_row_data", "text_row_data", "raw_text", or "html" |
| 17 | + verbose : If True, prints events |
| 18 | +
|
| 19 | + Attributes: |
| 20 | + lb_client : labelbox.Client object |
| 21 | + bq_client : bigquery.Client object |
| 22 | +
|
| 23 | + Key Functions: |
| 24 | + create_table_from_dataset : Creates a BigQuery table given a Labelbox dataset |
| 25 | + create_data_rows_from_table : Creates Labelbox data rows (and metadata) given a BigQuery table |
| 26 | + upsert_table_metadata : Updates BigQuery table metadata columns given a Labelbox dataset |
| 27 | + upsert_labelbox_metadata : Updates Labelbox metadata given a BigQuery table |
| 28 | + """ |
| 29 | + def __init__( |
| 30 | + self, |
| 31 | + lb_api_key:str=None, |
| 32 | + lb_endpoint='https://api.labelbox.com/graphql', |
| 33 | + lb_enable_experimental=False, |
| 34 | + lb_app_url="https://app.labelbox.com"): |
| 35 | + |
| 36 | + self.lb_client = labelboxClient(lb_api_key, endpoint=lb_endpoint, enable_experimental=lb_enable_experimental, app_url=lb_app_url) |
| 37 | + self.base_client = labelbaseClient(lb_api_key, lb_endpoint=lb_endpoint, lb_enable_experimental=lb_enable_experimental, lb_app_url=lb_app_url) |
| 38 | + |
| 39 | + # def create_table_from_dataset(): |
| 40 | + # return table |
| 41 | + |
| 42 | + def create_data_rows_from_table( |
| 43 | + self, |
| 44 | + table:pd.core.frame.DataFrame, |
| 45 | + lb_dataset:labelboxDataset, |
| 46 | + row_data_col:str, |
| 47 | + local_files:bool=False, |
| 48 | + global_key_col:str="", |
| 49 | + external_id_col:str="", |
| 50 | + metadata_index:dict={}, |
| 51 | + skip_duplicates:bool=False, |
| 52 | + divider="___", |
| 53 | + verbose:bool=False): |
| 54 | + """ Creates Labelbox data rows given a Pandas table and a Labelbox Dataset |
| 55 | + Args: |
| 56 | + table : Required (pandas.core.frame.DataFrame) - Pandas dataframe to-be-uploaded |
| 57 | + lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to |
| 58 | + row_data_col : Required (str) - Column name where the data row row data URL is located |
| 59 | + local_files : Required (bool) - If True, will create urls for local files / If False, treats the values in `row_data_col` as urls |
| 60 | + global_key_col : Optional (str) - Column name where the data row global key is located - defaults to the row_data column |
| 61 | + external_id_col : Optional (str) - Column name where the data row external ID is located - defaults to the row_data column |
| 62 | + metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type must be one of "enum", "string", "datetime" or "number" |
| 63 | + skip_duplicates : Optional (bool) - If True, will skip duplicate global_keys, otherwise will generate a unique global_key with a suffix "_1", "_2" and so on |
| 64 | + divider : Optional (str) - If skip_duplicates=False, uploader will auto-add a suffix to global keys to create unique ones, where new_global_key=old_global_key+divider+clone_counter |
| 65 | + verbose : Required (bool) - If True, prints information about code execution |
| 66 | + Returns: |
| 67 | + List of errors from data row upload - if successful, is an empty list |
| 68 | + """ |
| 69 | + check = self.base_client.enforce_metadata_index(metadata_index, verbose) |
| 70 | + if not check: |
| 71 | + return None |
| 72 | + table = self.base_client.sync_metadata_fields(table, get_columns_function, add_column_function, get_unique_values_function, metadata_index, verbose) |
| 73 | + if not table: |
| 74 | + return None |
| 75 | + global_key_col = global_key_col if global_key_col else row_data_col |
| 76 | + external_id_col = external_id_col if external_id_col else global_key_col |
| 77 | + |
| 78 | + metadata_schema_to_name_key = self.base_client.get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=False) |
| 79 | + metadata_name_key_to_schema = self.base_client.get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=True) |
| 80 | + |
| 81 | + global_key_to_upload_dict = {} |
| 82 | + futures = [] |
| 83 | + with ThreadPoolExecutor() as exc: |
| 84 | + for index, row in table.iterrows(): |
| 85 | + futures.append(exc.submit(connector.create_data_rows, local_files, self.lb_client, row, row_data_col, global_key_col, external_id_col, metadata_name_key_to_schema, metadata_index)) |
| 86 | + for f in as_completed(futures): |
| 87 | + res = f.result() |
| 88 | + global_key_to_upload_dict[str(res[0])] = res[1] |
| 89 | + |
| 90 | + upload_results = self.base_client.batch_create_data_rows(lb_dataset, global_key_to_upload_dict, skip_duplicates, divider) |
| 91 | + |
| 92 | + return upload_results |
| 93 | + |
| 94 | + # def upsert_table_metadata(): |
| 95 | + # return table |
| 96 | + |
| 97 | + # def upsert_labelbox_metadata(): |
| 98 | + # return upload_results |
0 commit comments