|
| 1 | +def create_data_rows(local_files, lb_client, base_client, row, row_data_col, global_key_col, external_id_col, metadata_name_key_to_schema, metadata_index, divider): |
| 2 | + """ Function to-be-multithreaded to create data row dictionaries from a table |
| 3 | + Args: |
| 4 | + local_files : Required (bool) - If True, will create urls for local files / If False, treats the values in `row_data_col` as urls |
| 5 | + lb_client : Required (labelbox.client.Client) - Labelbox Client object |
| 6 | + base_client : Required (labelbase.client.Client) - Labelbase Client object |
| 7 | + row : Required (pandas.core.series.Series) - Pandas row object |
| 8 | + row_data_col : Required (str) - Column name where the data row row data URL is located |
| 9 | + global_key_col : Required (str) - Column name where the data row global key is located - defaults to the row_data column |
| 10 | + external_id_col : Required (str) - Column name where the data row external ID is located - defaults to the row_data column |
| 11 | + metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type must be one of "enum", "string", "datetime" or "number" |
| 12 | + divider : Optional (str) - String delimiter to separate metadata field names from their metadata answer options in your metadata_name_key_to_schema dictionary |
| 13 | + Returns: |
| 14 | + Dictinary with "row_data", "global_key", "external_id" and "metadata_fields" keys |
| 15 | + """ |
| 16 | + row_data = lb_client.upload_file(str(row[row_data_col])) if local_files else str(row[row_data_col]) |
| 17 | + global_key = str(row[global_key_col]) |
| 18 | + data_row_dict = {"row_data" : row_data, "global_key" : global_key, "external_id" : row[external_id_col], "metadata_fields" : [{"schema_id" : metadata_name_key_to_schema['lb_integration_source', "value" : "Pandas"]}]} |
| 19 | + if metadata_index: |
| 20 | + for metadata_field_name in metadata_index.keys(): |
| 21 | + name_key = f"{metadata_field_name}{divider}{row[metadata_field_name]}" |
| 22 | + value = row[metadata_field_name] if name_key not in metadata_name_key_to_schema.keys() else metadata_name_key_to_schema[name_key] |
| 23 | + data_row_dict['metadata_fields'].append({"schema_id" : metadata_schema_to_name_key[metadata_field_name], "value" : value}) |
| 24 | + return global_key, data_row_dict |
0 commit comments