|
| 1 | +from labelbox import utils |
1 | 2 | import os
|
2 | 3 | import json
|
3 | 4 | import logging
|
@@ -74,6 +75,13 @@ def create_data_row(self, **kwargs):
|
74 | 75 | return self.client._create(DataRow, kwargs)
|
75 | 76 |
|
76 | 77 | def create_data_rows(self, items):
|
| 78 | + |
| 79 | + ## NOTE TODOS |
| 80 | + """ |
| 81 | + Add attachments (works with all types) |
| 82 | + Add external ids to bulk imports |
| 83 | + improved error handling (why job was accepted or not) |
| 84 | + """ |
77 | 85 | """ Creates multiple DataRow objects based on the given `items`.
|
78 | 86 |
|
79 | 87 | Each element in `items` can be either a `str` or a `dict`. If
|
@@ -117,47 +125,82 @@ def create_data_rows(self, items):
|
117 | 125 | def upload_if_necessary(item):
|
118 | 126 | if isinstance(item, str):
|
119 | 127 | item_url = self.client.upload_file(item)
|
120 |
| - # Convert item from str into a dict so it gets processed |
121 |
| - # like all other dicts. |
122 | 128 | item = {DataRow.row_data: item_url, DataRow.external_id: item}
|
| 129 | + elif isinstance(item, dict): |
| 130 | + if os.path.exists(item['row_data']): |
| 131 | + item_url = self.client.upload_file(item['row_data']) |
| 132 | + parts = { |
| 133 | + DataRow.row_data: |
| 134 | + item_url, |
| 135 | + DataRow.external_id: |
| 136 | + item.get('external_id', item['row_data']) |
| 137 | + } |
| 138 | + attachments = item.get('attachments') |
| 139 | + if attachments: |
| 140 | + item = {**parts, **{'attachments': attachments}} |
| 141 | + else: |
| 142 | + item = parts |
123 | 143 | return item
|
124 | 144 |
|
125 |
| - with ThreadPoolExecutor(file_upload_thread_count) as executor: |
126 |
| - futures = [ |
127 |
| - executor.submit(upload_if_necessary, item) for item in items |
128 |
| - ] |
129 |
| - items = [future.result() for future in as_completed(futures)] |
| 145 | + def validate_attachments(item): |
| 146 | + attachments = item.get('attachments') |
| 147 | + if attachments: |
| 148 | + if isinstance(attachments, list): |
| 149 | + for attachment in attachments: |
| 150 | + for required_key in ['type', 'value']: |
| 151 | + if required_key not in attachment: |
| 152 | + raise ValueError( |
| 153 | + f"Must provide a `{required_key}` key for each attachment. Found {attachment}." |
| 154 | + ) |
| 155 | + attachment_type = attachment.get('type') |
| 156 | + if attachment_type not in DataRow.supported_attachment_types: |
| 157 | + raise ValueError( |
| 158 | + f"meta_type must be one of {DataRow.supported_attachment_types}. Found {attachment_type}" |
| 159 | + ) |
| 160 | + else: |
| 161 | + raise ValueError( |
| 162 | + f"Attachments must be a list. Found {type(attachments)}" |
| 163 | + ) |
| 164 | + return attachments |
130 | 165 |
|
131 | 166 | def convert_item(item):
|
132 | 167 | # Don't make any changes to tms data
|
| 168 | + validate_attachments(item) |
133 | 169 | if "tileLayerUrl" in item:
|
134 | 170 | return item
|
135 |
| - # Convert string names to fields. |
| 171 | + |
| 172 | + item = upload_if_necessary(item) |
| 173 | + # Convert fields to string names. |
136 | 174 | item = {
|
137 |
| - key if isinstance(key, Field) else DataRow.field(key): value |
| 175 | + key.name if isinstance(key, Field) else key: value |
138 | 176 | for key, value in item.items()
|
139 | 177 | }
|
140 | 178 |
|
141 |
| - if DataRow.row_data not in item: |
| 179 | + if 'row_data' not in item: |
142 | 180 | raise InvalidQueryError(
|
143 |
| - "DataRow.row_data missing when creating DataRow.") |
| 181 | + "`row_data` missing when creating DataRow.") |
144 | 182 |
|
145 |
| - invalid_keys = set(item) - set(DataRow.fields()) |
| 183 | + # TODO: This is technically breaking. but also idt anyone is using the other fields. |
| 184 | + invalid_keys = set(item) - { |
| 185 | + 'row_data', 'external_id', 'attachments' |
| 186 | + } |
146 | 187 | if invalid_keys:
|
147 | 188 | raise InvalidAttributeError(DataRow, invalid_keys)
|
148 | 189 |
|
149 | 190 | # Item is valid, convert it to a dict {graphql_field_name: value}
|
150 | 191 | # Need to change the name of DataRow.row_data to "data"
|
151 | 192 | return {
|
152 |
| - "data" if key == DataRow.row_data else key.graphql_name: value |
| 193 | + "data" if key == "row_data" else utils.camel_case(key): value |
153 | 194 | for key, value in item.items()
|
154 | 195 | }
|
155 | 196 |
|
| 197 | + with ThreadPoolExecutor(file_upload_thread_count) as executor: |
| 198 | + futures = [executor.submit(convert_item, item) for item in items] |
| 199 | + items = [future.result() for future in as_completed(futures)] |
| 200 | + |
156 | 201 | # Prepare and upload the desciptor file
|
157 |
| - items = [convert_item(item) for item in items] |
158 | 202 | data = json.dumps(items)
|
159 | 203 | descriptor_url = self.client.upload_data(data)
|
160 |
| - |
161 | 204 | # Create data source
|
162 | 205 | dataset_param = "datasetId"
|
163 | 206 | url_param = "jsonUrl"
|
|
0 commit comments