|
| 1 | +from labelbox import utils |
1 | 2 | import os
|
2 | 3 | import json
|
3 | 4 | import logging
|
@@ -81,13 +82,17 @@ def create_data_rows(self, items):
|
81 | 82 | is uploaded to Labelbox and a DataRow referencing it is created.
|
82 | 83 |
|
83 | 84 | If an item is a `dict`, then it could support one of the two following structures
|
84 |
| - 1. For static imagery, video, and text it should map `DataRow` fields (or their names) to values. |
85 |
| - At the minimum an `item` passed as a `dict` must contain a `DataRow.row_data` key and value. |
| 85 | + 1. For static imagery, video, and text it should map `DataRow` field names to values. |
| 86 | + At the minimum an `item` passed as a `dict` must contain a `row_data` key and value. |
| 87 | + If the value for row_data is a local file path and the path exists, |
| 88 | + then the local file will be uploaded to labelbox. |
| 89 | +
|
86 | 90 | 2. For tiled imagery the dict must match the import structure specified in the link below
|
87 | 91 | https://docs.labelbox.com/data-model/en/index-en#tiled-imagery-import
|
88 | 92 |
|
89 | 93 | >>> dataset.create_data_rows([
|
90 | 94 | >>> {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
|
| 95 | + >>> {DataRow.row_data:"/path/to/file1.jpg"}, |
91 | 96 | >>> "path/to/file2.jpg",
|
92 | 97 | >>> {"tileLayerUrl" : "http://", ...}
|
93 | 98 | >>> ])
|
@@ -115,64 +120,105 @@ def create_data_rows(self, items):
|
115 | 120 | DataRow = Entity.DataRow
|
116 | 121 |
|
117 | 122 | def upload_if_necessary(item):
|
118 |
| - if isinstance(item, str): |
119 |
| - item_url = self.client.upload_file(item) |
120 |
| - # Convert item from str into a dict so it gets processed |
121 |
| - # like all other dicts. |
122 |
| - item = {DataRow.row_data: item_url, DataRow.external_id: item} |
| 123 | + row_data = item['row_data'] |
| 124 | + if os.path.exists(row_data): |
| 125 | + item_url = self.client.upload_file(item['row_data']) |
| 126 | + item = { |
| 127 | + "row_data": item_url, |
| 128 | + "external_id": item.get('external_id', item['row_data']), |
| 129 | + "attachments": item.get('attachments', []) |
| 130 | + } |
123 | 131 | return item
|
124 | 132 |
|
125 |
| - with ThreadPoolExecutor(file_upload_thread_count) as executor: |
126 |
| - futures = [ |
127 |
| - executor.submit(upload_if_necessary, item) for item in items |
128 |
| - ] |
129 |
| - items = [future.result() for future in as_completed(futures)] |
130 |
| - |
131 |
| - def convert_item(item): |
132 |
| - # Don't make any changes to tms data |
133 |
| - if "tileLayerUrl" in item: |
134 |
| - return item |
135 |
| - # Convert string names to fields. |
136 |
| - item = { |
137 |
| - key if isinstance(key, Field) else DataRow.field(key): value |
138 |
| - for key, value in item.items() |
139 |
| - } |
| 133 | + def validate_attachments(item): |
| 134 | + attachments = item.get('attachments') |
| 135 | + if attachments: |
| 136 | + if isinstance(attachments, list): |
| 137 | + for attachment in attachments: |
| 138 | + Entity.AssetAttachment.validate_attachment_json( |
| 139 | + attachment) |
| 140 | + else: |
| 141 | + raise ValueError( |
| 142 | + f"Attachments must be a list. Found {type(attachments)}" |
| 143 | + ) |
| 144 | + return attachments |
| 145 | + |
| 146 | + def format_row(item): |
| 147 | + # Formats user input into a consistent dict structure |
| 148 | + if isinstance(item, dict): |
| 149 | + # Convert fields to strings |
| 150 | + item = { |
| 151 | + key.name if isinstance(key, Field) else key: value |
| 152 | + for key, value in item.items() |
| 153 | + } |
| 154 | + elif isinstance(item, str): |
| 155 | + # The main advantage of using a string over a dict is that the user is specifying |
| 156 | + # that the file should exist locally. |
| 157 | + # That info is lost after this section so we should check for it here. |
| 158 | + if not os.path.exists(item): |
| 159 | + raise ValueError(f"Filepath {item} does not exist.") |
| 160 | + item = {"row_data": item, "external_id": item} |
| 161 | + return item |
140 | 162 |
|
141 |
| - if DataRow.row_data not in item: |
| 163 | + def validate_keys(item): |
| 164 | + if 'row_data' not in item: |
142 | 165 | raise InvalidQueryError(
|
143 |
| - "DataRow.row_data missing when creating DataRow.") |
| 166 | + "`row_data` missing when creating DataRow.") |
144 | 167 |
|
145 |
| - invalid_keys = set(item) - set(DataRow.fields()) |
| 168 | + invalid_keys = set(item) - { |
| 169 | + *{f.name for f in DataRow.fields()}, 'attachments' |
| 170 | + } |
146 | 171 | if invalid_keys:
|
147 | 172 | raise InvalidAttributeError(DataRow, invalid_keys)
|
| 173 | + return item |
| 174 | + |
| 175 | + def convert_item(item): |
| 176 | + # Don't make any changes to tms data |
| 177 | + if "tileLayerUrl" in item: |
| 178 | + validate_attachments(item) |
| 179 | + return item |
| 180 | + # Convert all payload variations into the same dict format |
| 181 | + item = format_row(item) |
| 182 | + # Make sure required keys exist (and there are no extra keys) |
| 183 | + validate_keys(item) |
| 184 | + # Make sure attachments are valid |
| 185 | + validate_attachments(item) |
| 186 | + # Upload any local file paths |
| 187 | + item = upload_if_necessary(item) |
148 | 188 |
|
149 |
| - # Item is valid, convert it to a dict {graphql_field_name: value} |
150 |
| - # Need to change the name of DataRow.row_data to "data" |
151 | 189 | return {
|
152 |
| - "data" if key == DataRow.row_data else key.graphql_name: value |
| 190 | + "data" if key == "row_data" else utils.camel_case(key): value |
153 | 191 | for key, value in item.items()
|
154 | 192 | }
|
155 | 193 |
|
| 194 | + if not isinstance(items, list): |
| 195 | + raise ValueError( |
| 196 | + f"Must pass a list to create_data_rows. Found {type(items)}") |
| 197 | + |
| 198 | + with ThreadPoolExecutor(file_upload_thread_count) as executor: |
| 199 | + futures = [executor.submit(convert_item, item) for item in items] |
| 200 | + items = [future.result() for future in as_completed(futures)] |
| 201 | + |
156 | 202 | # Prepare and upload the desciptor file
|
157 |
| - items = [convert_item(item) for item in items] |
158 | 203 | data = json.dumps(items)
|
159 | 204 | descriptor_url = self.client.upload_data(data)
|
160 |
| - |
161 | 205 | # Create data source
|
162 | 206 | dataset_param = "datasetId"
|
163 | 207 | url_param = "jsonUrl"
|
164 | 208 | query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
|
165 | 209 | appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
|
166 |
| - ){ taskId accepted } } """ % (dataset_param, url_param, |
167 |
| - dataset_param, url_param) |
| 210 | + ){ taskId accepted errorMessage } } """ % (dataset_param, url_param, |
| 211 | + dataset_param, url_param) |
| 212 | + |
168 | 213 | res = self.client.execute(query_str, {
|
169 | 214 | dataset_param: self.uid,
|
170 | 215 | url_param: descriptor_url
|
171 | 216 | })
|
172 | 217 | res = res["appendRowsToDataset"]
|
173 | 218 | if not res["accepted"]:
|
| 219 | + msg = res['errorMessage'] |
174 | 220 | raise InvalidQueryError(
|
175 |
| - "Server did not accept DataRow creation request") |
| 221 | + f"Server did not accept DataRow creation request. {msg}") |
176 | 222 |
|
177 | 223 | # Fetch and return the task.
|
178 | 224 | task_id = res["taskId"]
|
|
0 commit comments