|
1 |
| -def create_data_rows(local_files, lb_client, base_client, row, row_data_col, global_key_col, external_id_col, metadata_name_key_to_schema, metadata_index, divider): |
2 |
| - """ Function to-be-multithreaded to create data row dictionaries from a table |
| 1 | +def create_data_rows( |
| 2 | + local_files, |
| 3 | + lb_client, |
| 4 | + row, |
| 5 | + row_data_col, |
| 6 | + global_key_col, |
| 7 | + external_id_col, |
| 8 | + metadata_index, |
| 9 | + metadata_name_key_to_schema, |
| 10 | + metadata_schema_to_name_key, |
| 11 | + divider): |
| 12 | + """ Function to-be-multithreaded to create data row dictionaries from a Pandas table |
3 | 13 | Args:
|
4 |
| - local_files : Required (bool) - If True, will create urls for local files / If False, treats the values in `row_data_col` as urls |
5 |
| - lb_client : Required (labelbox.client.Client) - Labelbox Client object |
6 |
| - base_client : Required (labelbase.client.Client) - Labelbase Client object |
7 |
| - row : Required (pandas.core.series.Series) - Pandas row object |
8 |
| - row_data_col : Required (str) - Column name where the data row row data URL is located |
9 |
| - global_key_col : Required (str) - Column name where the data row global key is located - defaults to the row_data column |
10 |
| - external_id_col : Required (str) - Column name where the data row external ID is located - defaults to the row_data column |
11 |
| - metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type must be one of "enum", "string", "datetime" or "number" |
12 |
| - divider : Optional (str) - String delimiter to separate metadata field names from their metadata answer options in your metadata_name_key_to_schema dictionary |
| 14 | + local_files : Required (bool) - If True, will create urls for local files / If False, treats the values in `row_data_col` as urls |
| 15 | + lb_client : Required (labelbox.client.Client) - Labelbox Client object |
| 16 | + row : Required (pandas.core.series.Series) - Pandas row object |
| 17 | + row_data_col : Required (str) - Column name where the data row row data URL is located |
| 18 | + global_key_col : Required (str) - Column name where the data row global key is located - defaults to the row_data column |
| 19 | + external_id_col : Required (str) - Column name where the data row external ID is located - defaults to the row_data column |
| 20 | + metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type must be one of "enum", "string", "datetime" or "number" |
| 21 | + metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id} |
| 22 | + metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema |
| 23 | + divider : Optional (str) - String delimiter to separate metadata field names from their metadata answer options in your metadata_name_key_to_schema dictionary |
13 | 24 | Returns:
|
14 |
| - Dictinary with "row_data", "global_key", "external_id" and "metadata_fields" keys |
| 25 | + Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys |
15 | 26 | """
|
16 | 27 | row_data = lb_client.upload_file(str(row[row_data_col])) if local_files else str(row[row_data_col])
|
17 |
| - global_key = str(row[global_key_col]) |
18 |
| - data_row_dict = {"row_data" : row_data, "global_key" : global_key, "external_id" : row[external_id_col], "metadata_fields" : [{"schema_id" : metadata_name_key_to_schema['lb_integration_source', "value" : "Pandas"]}]} |
| 28 | + data_row_dict = {"row_data" : row_data, "global_key" : str(row[global_key_col]), "external_id" : row[external_id_col], "metadata_fields" : [{"schema_id" : metadata_name_key_to_schema['lb_integration_source', "value" : "Pandas"]}]} |
19 | 29 | if metadata_index:
|
20 | 30 | for metadata_field_name in metadata_index.keys():
|
21 | 31 | name_key = f"{metadata_field_name}{divider}{row[metadata_field_name]}"
|
22 | 32 | value = row[metadata_field_name] if name_key not in metadata_name_key_to_schema.keys() else metadata_name_key_to_schema[name_key]
|
23 | 33 | data_row_dict['metadata_fields'].append({"schema_id" : metadata_schema_to_name_key[metadata_field_name], "value" : value})
|
24 |
| - return global_key, data_row_dict |
| 34 | + return data_row_dict |
0 commit comments