Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit e9b6b9f

Browse files
Create client.py
1 parent 1e953f1 commit e9b6b9f

File tree

1 file changed

+98
-0
lines changed

1 file changed

+98
-0
lines changed

labelpandas/client.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from labelbox import Client as labelboxClient
2+
from labelbase import Client as labelbaseClient
3+
from labelbox.schema.dataset import Dataset as labelboxDataset
4+
import pandas as pd
5+
from labelpandas import connector
6+
7+
class Client():
8+
"""
9+
Args:
10+
pandas_dataframe : Required: pandas.core.frame.DataFrame() object
11+
lb_client : Required: labelbox.client.Client()
12+
lb_dataset : labelbox.schema.dataset.Dataset() object - if provided, will create data rows in this dataset
13+
row_data_col : Column name corresponding to file_path / URL
14+
external_id_col : Column name corresponding to external ID
15+
metadata_index : Creates metadata, dictionary where {key=metadata_field_name : value=metadata_type}, where metadata_type is one of "string", "number", "datetime", or "enum"
16+
attachment_index : Creates attachments, ictionary where {key=attachment_field_name : value=attachment_type}, where attachment_type is one of "image_row_data", "video_row_data", "text_row_data", "raw_text", or "html"
17+
verbose : If True, prints events
18+
19+
Attributes:
20+
lb_client : labelbox.Client object
21+
bq_client : bigquery.Client object
22+
23+
Key Functions:
24+
create_table_from_dataset : Creates a BigQuery table given a Labelbox dataset
25+
create_data_rows_from_table : Creates Labelbox data rows (and metadata) given a BigQuery table
26+
upsert_table_metadata : Updates BigQuery table metadata columns given a Labelbox dataset
27+
upsert_labelbox_metadata : Updates Labelbox metadata given a BigQuery table
28+
"""
29+
def __init__(
30+
self,
31+
lb_api_key:str=None,
32+
lb_endpoint='https://api.labelbox.com/graphql',
33+
lb_enable_experimental=False,
34+
lb_app_url="https://app.labelbox.com"):
35+
36+
self.lb_client = labelboxClient(lb_api_key, endpoint=lb_endpoint, enable_experimental=lb_enable_experimental, app_url=lb_app_url)
37+
self.base_client = labelbaseClient(lb_api_key, lb_endpoint=lb_endpoint, lb_enable_experimental=lb_enable_experimental, lb_app_url=lb_app_url)
38+
39+
# def create_table_from_dataset():
40+
# return table
41+
42+
def create_data_rows_from_table(
43+
self,
44+
table:pd.core.frame.DataFrame,
45+
lb_dataset:labelboxDataset,
46+
row_data_col:str,
47+
local_files:bool=False,
48+
global_key_col:str="",
49+
external_id_col:str="",
50+
metadata_index:dict={},
51+
skip_duplicates:bool=False,
52+
divider="___",
53+
verbose:bool=False):
54+
""" Creates Labelbox data rows given a Pandas table and a Labelbox Dataset
55+
Args:
56+
table : Required (pandas.core.frame.DataFrame) - Pandas dataframe to-be-uploaded
57+
lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to
58+
row_data_col : Required (str) - Column name where the data row row data URL is located
59+
local_files : Required (bool) - If True, will create urls for local files / If False, treats the values in `row_data_col` as urls
60+
global_key_col : Optional (str) - Column name where the data row global key is located - defaults to the row_data column
61+
external_id_col : Optional (str) - Column name where the data row external ID is located - defaults to the row_data column
62+
metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type must be one of "enum", "string", "datetime" or "number"
63+
skip_duplicates : Optional (bool) - If True, will skip duplicate global_keys, otherwise will generate a unique global_key with a suffix "_1", "_2" and so on
64+
divider : Optional (str) - If skip_duplicates=False, uploader will auto-add a suffix to global keys to create unique ones, where new_global_key=old_global_key+divider+clone_counter
65+
verbose : Required (bool) - If True, prints information about code execution
66+
Returns:
67+
List of errors from data row upload - if successful, is an empty list
68+
"""
69+
check = self.base_client.enforce_metadata_index(metadata_index, verbose)
70+
if not check:
71+
return None
72+
table = self.base_client.sync_metadata_fields(table, get_columns_function, add_column_function, get_unique_values_function, metadata_index, verbose)
73+
if not table:
74+
return None
75+
global_key_col = global_key_col if global_key_col else row_data_col
76+
external_id_col = external_id_col if external_id_col else global_key_col
77+
78+
metadata_schema_to_name_key = self.base_client.get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=False)
79+
metadata_name_key_to_schema = self.base_client.get_metadata_schema_to_name_key(lb_mdo=False, divider=divider, invert=True)
80+
81+
global_key_to_upload_dict = {}
82+
futures = []
83+
with ThreadPoolExecutor() as exc:
84+
for index, row in table.iterrows():
85+
futures.append(exc.submit(connector.create_data_rows, local_files, self.lb_client, row, row_data_col, global_key_col, external_id_col, metadata_name_key_to_schema, metadata_index))
86+
for f in as_completed(futures):
87+
res = f.result()
88+
global_key_to_upload_dict[str(res[0])] = res[1]
89+
90+
upload_results = self.base_client.batch_create_data_rows(lb_dataset, global_key_to_upload_dict, skip_duplicates, divider)
91+
92+
return upload_results
93+
94+
# def upsert_table_metadata():
95+
# return table
96+
97+
# def upsert_labelbox_metadata():
98+
# return upload_results

0 commit comments

Comments
 (0)