@@ -75,27 +75,24 @@ def create_data_row(self, **kwargs):
75
75
return self .client ._create (DataRow , kwargs )
76
76
77
77
def create_data_rows (self , items ):
78
-
79
- ## NOTE TODOS
80
- """
81
- Add attachments (works with all types)
82
- Add external ids to bulk imports
83
- improved error handling (why job was accepted or not)
84
- """
85
78
""" Creates multiple DataRow objects based on the given `items`.
86
79
87
80
Each element in `items` can be either a `str` or a `dict`. If
88
81
it is a `str`, then it is interpreted as a local file path. The file
89
82
is uploaded to Labelbox and a DataRow referencing it is created.
90
83
91
84
If an item is a `dict`, then it could support one of the two following structures
92
- 1. For static imagery, video, and text it should map `DataRow` fields (or their names) to values.
93
- At the minimum an `item` passed as a `dict` must contain a `DataRow.row_data` key and value.
85
+ 1. For static imagery, video, and text it should map `DataRow` field names to values.
86
+ At the minimum an `item` passed as a `dict` must contain a `row_data` key and value.
87
+ If the value for row_data is a local file path and the path exists,
88
+ then the local file will be uploaded to labelbox.
89
+
94
90
2. For tiled imagery the dict must match the import structure specified in the link below
95
91
https://docs.labelbox.com/data-model/en/index-en#tiled-imagery-import
96
92
97
93
>>> dataset.create_data_rows([
98
94
>>> {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
95
+ >>> {DataRow.row_data:"/path/to/file1.jpg"},
99
96
>>> "path/to/file2.jpg",
100
97
>>> {"tileLayerUrl" : "http://", ...}
101
98
>>> ])
@@ -123,72 +120,72 @@ def create_data_rows(self, items):
123
120
DataRow = Entity .DataRow
124
121
125
122
def upload_if_necessary (item ):
126
- if isinstance (item , str ):
127
- item_url = self .client .upload_file (item )
128
- item = {DataRow .row_data : item_url , DataRow .external_id : item }
129
- elif isinstance (item , dict ):
130
- if os .path .exists (item ['row_data' ]):
131
- item_url = self .client .upload_file (item ['row_data' ])
132
- parts = {
133
- DataRow .row_data :
134
- item_url ,
135
- DataRow .external_id :
136
- item .get ('external_id' , item ['row_data' ])
137
- }
138
- attachments = item .get ('attachments' )
139
- if attachments :
140
- item = {** parts , ** {'attachments' : attachments }}
141
- else :
142
- item = parts
123
+ row_data = item ['row_data' ]
124
+ if os .path .exists (row_data ):
125
+ item_url = self .client .upload_file (item ['row_data' ])
126
+ item = {
127
+ "row_data" : item_url ,
128
+ "external_id" : item .get ('external_id' , item ['row_data' ]),
129
+ "attachments" : item .get ('attachments' , [])
130
+ }
143
131
return item
144
132
145
133
def validate_attachments (item ):
146
134
attachments = item .get ('attachments' )
147
135
if attachments :
148
136
if isinstance (attachments , list ):
149
137
for attachment in attachments :
150
- for required_key in ['type' , 'value' ]:
151
- if required_key not in attachment :
152
- raise ValueError (
153
- f"Must provide a `{ required_key } ` key for each attachment. Found { attachment } ."
154
- )
155
- attachment_type = attachment .get ('type' )
156
- if attachment_type not in DataRow .supported_attachment_types :
157
- raise ValueError (
158
- f"meta_type must be one of { DataRow .supported_attachment_types } . Found { attachment_type } "
159
- )
138
+ Entity .AssetAttachment .validate_attachment_json (
139
+ attachment )
160
140
else :
161
141
raise ValueError (
162
142
f"Attachments must be a list. Found { type (attachments )} "
163
143
)
164
144
return attachments
165
145
166
- def convert_item (item ):
167
- # Don't make any changes to tms data
168
- validate_attachments (item )
169
- if "tileLayerUrl" in item :
170
- return item
171
-
172
- item = upload_if_necessary (item )
173
- # Convert fields to string names.
174
- item = {
175
- key .name if isinstance (key , Field ) else key : value
176
- for key , value in item .items ()
177
- }
146
+ def format_row (item ):
147
+ # Formats user input into a consistent dict structure
148
+ if isinstance (item , dict ):
149
+ # Convert fields to strings
150
+ item = {
151
+ key .name if isinstance (key , Field ) else key : value
152
+ for key , value in item .items ()
153
+ }
154
+ elif isinstance (item , str ):
155
+ # The main advantage of using a string over a dict is that the user is specifying
156
+ # that the file should exist locally.
157
+ # That info is lost after this section so we should check for it here.
158
+ if not os .path .exists (item ):
159
+ raise ValueError (f"Filepath { item } does not exist." )
160
+ item = {"row_data" : item , "external_id" : item }
161
+ return item
178
162
163
+ def validate_keys (item ):
179
164
if 'row_data' not in item :
180
165
raise InvalidQueryError (
181
166
"`row_data` missing when creating DataRow." )
182
167
183
- # TODO: This is technically breaking. but also idt anyone is using the other fields.
184
168
invalid_keys = set (item ) - {
185
- 'row_data' , 'external_id' , 'attachments'
169
+ * { f . name for f in DataRow . fields ()} , 'attachments'
186
170
}
187
171
if invalid_keys :
188
172
raise InvalidAttributeError (DataRow , invalid_keys )
173
+ return item
174
+
175
+ def convert_item (item ):
176
+ # Don't make any changes to tms data
177
+ if "tileLayerUrl" in item :
178
+ validate_attachments (item )
179
+ return item
180
+ # Convert all payload variations into the same dict format
181
+ item = format_row (item )
182
+ # Make sure required keys exist (and there are no extra keys)
183
+ validate_keys (item )
184
+ # Make sure attachments are valid
185
+ validate_attachments (item )
186
+ # Upload any local file paths
187
+ item = upload_if_necessary (item )
189
188
190
- # Item is valid, convert it to a dict {graphql_field_name: value}
191
- # Need to change the name of DataRow.row_data to "data"
192
189
return {
193
190
"data" if key == "row_data" else utils .camel_case (key ): value
194
191
for key , value in item .items ()
@@ -207,7 +204,8 @@ def convert_item(item):
207
204
query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
208
205
appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
209
206
){ taskId accepted errorMessage } } """ % (dataset_param , url_param ,
210
- dataset_param , url_param )
207
+ dataset_param , url_param )
208
+
211
209
res = self .client .execute (query_str , {
212
210
dataset_param : self .uid ,
213
211
url_param : descriptor_url
0 commit comments