|
5 | 5 | [
|
6 | 6 | annotation_ndjson,
|
7 | 7 | annotation_ndjson,
|
8 |
| - annotation_ndjson, |
| 8 | + annotation_ndjson |
9 | 9 | ],
|
10 | 10 | project_id :
|
11 | 11 | [
|
12 | 12 | annotation_ndjson,
|
13 | 13 | annotation_ndjson,
|
14 |
| - annotation_ndjson, |
15 |
| - ], |
| 14 | + annotation_ndjson |
| 15 | + ] |
16 | 16 | }
|
17 | 17 | This is the format that labelbase.uploader.batch_upload_annotations() expects
|
18 | 18 | """
|
19 | 19 | import pandas
|
| 20 | +import labelbase |
20 | 21 | from labelbox import Client as labelboxClient
|
| 22 | +from tqdm.autonotebook import tqdm |
| 23 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
21 | 24 |
|
22 |
| -def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, row_data_col:str, global_key_col:str, |
23 |
| - project_id_col:str, annotation_index:dict, divider:str="///", verbose:bool=False): |
24 |
| - if not annotation_index: |
25 |
| - project_id_to_upload_dict = {} |
26 |
| - errors = f"No annotation index provided - no annotations uploaded" |
27 |
| - else: |
28 |
| - try: |
29 |
| - project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function(table, project_id_col)} |
30 |
| - for project_id in project_id_to_upload_dict: |
31 |
| - project_id_to_upload_dict[project_id] = [] |
32 |
| - project_id_to_ontology_index[project_id] = get_ontology_schema_to_name_path( |
33 |
| - ontology=client.get_project(project_id).ontology(), divider=divider, invert=True |
| 25 | +def create_annotation_upload_dict(client:labelboxClient, table:pandas.core.frame.DataFrame, table_dict:dict, |
| 26 | + row_data_col:str, global_key_col:str, project_id_col:str, |
| 27 | + project_id:str, annotation_index:dict, global_key_to_data_row_id:dict, |
| 28 | + divider:str="///", verbose:bool=False): |
| 29 | + """ |
| 30 | + Args: |
| 31 | + client : Required (labelbox.client.Client) - Labelbox Client object |
| 32 | + table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame |
| 33 | + table_dict : Required (dict) - Pandas DataFrame as dict with df.to_dict("records") |
| 34 | + row_data_col : Required (str) - Column containing asset URL or raw text |
| 35 | + global_key_col : Required (str) - Column name containing the data row global key - defaults to row data |
| 36 | + project_id_col : Required (str) - Column name containing the project ID to batch a given row to |
| 37 | + project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no "project_id" column exists |
| 38 | + annotation_index : Required (dict) - Dictonary where {key=column_name : value=annotation_type} |
| 39 | + global_key_to_data_row_id : Required (dict) - Dictionary where {key=global_key : value=data_row_id} |
| 40 | + Returns: |
| 41 | + |
| 42 | + """ |
| 43 | + project_id_to_upload_dict = {project_id : [] for project_id in get_unique_values_function(table, project_id_col)} |
| 44 | + project_id_to_ontology = {} |
| 45 | + for project_id in project_id_to_upload_dict: |
| 46 | + ontology = client.get_project(project_id).ontology() |
| 47 | + project_id_to_ontology[project_id] = { |
| 48 | + "ontology_index" : labelbase.ontology.get_ontology_schema_to_name_path(ontology, divider=divider, invert=True, detailed=True), |
| 49 | + "schema_to_name_path" : labelbase.ontology.get_ontology_schema_to_name_path(ontology, divider=divider, invert=False, detailed=False) |
| 50 | + ) |
| 51 | + if verbose: |
| 52 | + for row_dict in tqdm(table_dict): |
| 53 | + for column_name in annotation_index.keys(): |
| 54 | + ndjsons = create_ndjsons( |
| 55 | + data_row_id = global_key_to_data_row_id[global_key_col], |
| 56 | + top_level_name=annotation_index[column_name], |
| 57 | + annotation_values=row_dict[column_name], |
| 58 | + ontology_index=project_id_to_ontology_index[row[project_id_col]], |
| 59 | + divider=divider |
34 | 60 | )
|
35 |
| - if verbose: |
36 |
| - for index, row in tqdm(table.iterrows()): |
37 |
| - for column_name in annotation_index.keys(): |
38 |
| - ndjsons = create_ndjsons( |
39 |
| - annotation_values=row[column_name], |
40 |
| - annotation_type=annotation_index[column_name], |
41 |
| - ontology_index=project_id_to_ontology_index[row[project_id_col]], |
42 |
| - divide=divider |
43 |
| - ) |
44 |
| - for ndjson in ndjsons: |
45 |
| - project_id_to_upload_dict[row[project_id_col]].append(ndjson) |
46 |
| - for index, row in table.iterrows(): |
47 |
| - for column_name in annotation_index.keys(): |
48 |
| - ndjsons = create_ndjsons( |
49 |
| - annotation_values=row[column_name], |
50 |
| - annotation_type=annotation_index[column_name], |
51 |
| - ontology_index=project_id_to_ontology_index[row[project_id_col]], |
52 |
| - divide=divider |
53 |
| - ) |
54 |
| - for ndjson in ndjsons: |
55 |
| - project_id_to_upload_dict[row[project_id_col]].append(ndjson) |
56 |
| - except Exception as e: |
57 |
| - errors = e |
58 |
| - return project_id_to_upload_dict, errors |
| 61 | + for ndjson in ndjsons: |
| 62 | + project_id_to_upload_dict[row[project_id_col]].append(ndjson) |
| 63 | + for row_dict in table_dict: |
| 64 | + for column_name in annotation_index.keys(): |
| 65 | + ndjsons = create_ndjsons( |
| 66 | + data_row_id = global_key_to_data_row_id[global_key_col], |
| 67 | + top_level_name=annotation_index[column_name], |
| 68 | + annotation_values=row_dict[column_name], |
| 69 | + ontology_index=project_id_to_ontology_index[row[project_id_col]], |
| 70 | + divider=divider |
| 71 | + ) |
| 72 | + for ndjson in ndjsons: |
| 73 | + project_id_to_upload_dict[row[project_id_col]].append(ndjson) |
| 74 | + return project_id_to_upload_dict |
0 commit comments