@@ -137,20 +137,17 @@ class LBV1Label(BaseModel):
137
137
label_url : Optional [str ] = Extra ('View Label' )
138
138
has_open_issues : Optional [float ] = Extra ('Has Open Issues' )
139
139
skipped : Optional [bool ] = Extra ('Skipped' )
140
+ media_type : Optional [str ] = Extra ('media_type' )
140
141
141
142
def to_common (self ) -> Label :
142
143
if isinstance (self .label , list ):
143
144
annotations = []
144
145
for lbl in self .label :
145
146
annotations .extend (lbl .to_common ())
146
- data = VideoData (url = self .row_data ,
147
- external_id = self .external_id ,
148
- uid = self .data_row_id )
149
147
else :
150
148
annotations = self .label .to_common ()
151
- data = self ._infer_media_type ()
152
149
153
- return Label (data = data ,
150
+ return Label (data = self . _data_row_to_common () ,
154
151
uid = self .id ,
155
152
annotations = annotations ,
156
153
extra = {
@@ -174,44 +171,49 @@ def from_common(cls, label: Label):
174
171
external_id = label .data .external_id ,
175
172
** label .extra )
176
173
177
- def _infer_media_type (self ):
178
- # Video annotations are formatted differently from text and images
179
- # So we only need to differentiate those two
174
+ def _data_row_to_common (self ) -> Union [ImageData , TextData , VideoData ]:
175
+ # Use data row information to construct the appropriate annotatin type
180
176
data_row_info = {
177
+ 'url' if self ._is_url () else 'text' : self .row_data ,
181
178
'external_id' : self .external_id ,
182
179
'uid' : self .data_row_id
183
180
}
184
181
182
+ self .media_type = self .media_type or self ._infer_media_type ()
183
+ media_mapping = {
184
+ 'text' : TextData ,
185
+ 'image' : ImageData ,
186
+ 'video' : VideoData
187
+ }
188
+ if self .media_type not in media_mapping :
189
+ raise ValueError (
190
+ f"Annotation types are only supported for { list (media_mapping )} media types."
191
+ f" Found { self .media_type } ." )
192
+ return media_mapping [self .media_type ](** data_row_info )
193
+
194
+ def _infer_media_type (self ) -> str :
195
+ # Determines the data row type based on the label content
196
+ if isinstance (self .label , list ):
197
+ return 'video'
185
198
if self ._has_text_annotations ():
186
- # If it has text annotations then it must be text
187
- if self ._is_url ():
188
- return TextData (url = self .row_data , ** data_row_info )
189
- else :
190
- return TextData (text = self .row_data , ** data_row_info )
199
+ return 'text'
191
200
elif self ._has_object_annotations ():
192
- # If it has object annotations and none are text annotations then it must be an image
193
- if self ._is_url ():
194
- return ImageData (url = self .row_data , ** data_row_info )
195
- else :
196
- return ImageData (text = self .row_data , ** data_row_info )
201
+ return 'image'
197
202
else :
198
- # no annotations to infer data type from.
199
- # Use information from the row_data format if possible.
200
203
if self ._row_contains ((".jpg" , ".png" , ".jpeg" )) and self ._is_url ():
201
- return ImageData (url = self .row_data , ** data_row_info )
202
- elif self ._row_contains (
203
- (".txt" , ".text" , ".html" )) and self ._is_url ():
204
- return TextData (url = self .row_data , ** data_row_info )
205
- elif not self ._is_url ():
206
- return TextData (text = self .row_data , ** data_row_info )
204
+ return 'image'
205
+ elif (self ._row_contains ((".txt" , ".text" , ".html" )) and
206
+ self ._is_url ()) or not self ._is_url ():
207
+ return 'text'
207
208
else :
208
- # This is going to be urls that do not contain any file extensions
209
- # This will only occur on skipped images.
210
- # To use this converter on data with this url format
211
- # filter out empty examples from the payload before deserializing.
209
+ # This condition will occur when a data row url does not contain a file extension
210
+ # and the label does not contain object annotations that indicate the media type.
211
+ # As a temporary workaround you can explicitly set the media_type
212
+ # in each label json payload before converting.
213
+ # We will eventually provide the media type in the export.
212
214
raise TypeError (
213
- "Can't infer data type from row data. Remove empty examples before trying again. "
214
- f"row_data: { self . row_data [: 200 ] } " )
215
+ "Can't infer data type from row data. row_data: {self.row_data[:200]} "
216
+ )
215
217
216
218
def _has_object_annotations (self ):
217
219
return len (self .label .objects ) > 0
0 commit comments