Skip to content

Commit a3fa5af

Browse files
author
Matt Sokoloff
committed
add support for media types in label exports
1 parent 5f0d421 commit a3fa5af

File tree

5 files changed

+58
-53
lines changed

5 files changed

+58
-53
lines changed

labelbox/data/serialization/labelbox_v1/classification.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,25 @@
99

1010

1111
class LBV1ClassificationAnswer(LBV1Feature):
12+
1213
def to_common(self) -> ClassificationAnswer:
1314
return ClassificationAnswer(feature_schema_id=self.schema_id,
14-
name=self.title,
15-
keyframe = self.keyframe,
16-
extra={
17-
'feature_id': self.feature_id,
18-
'value': self.value
19-
})
15+
name=self.title,
16+
keyframe=self.keyframe,
17+
extra={
18+
'feature_id': self.feature_id,
19+
'value': self.value
20+
})
2021

2122
@classmethod
22-
def from_common(cls, answer: ClassificationAnnotation) -> "LBV1ClassificationAnswer":
23-
return cls(
24-
schema_id=answer.feature_schema_id,
25-
title=answer.name,
26-
value=answer.extra.get('value'),
27-
feature_id=answer.extra.get('feature_id'),
28-
keyframe=answer.keyframe
29-
)
23+
def from_common(
24+
cls,
25+
answer: ClassificationAnnotation) -> "LBV1ClassificationAnswer":
26+
return cls(schema_id=answer.feature_schema_id,
27+
title=answer.name,
28+
value=answer.extra.get('value'),
29+
feature_id=answer.extra.get('feature_id'),
30+
keyframe=answer.keyframe)
3031

3132

3233
class LBV1Radio(LBV1Feature):
@@ -47,9 +48,7 @@ class LBV1Checklist(LBV1Feature):
4748
answers: List[LBV1ClassificationAnswer]
4849

4950
def to_common(self) -> Checklist:
50-
return Checklist(answer=[
51-
answer.to_common() for answer in self.answers
52-
])
51+
return Checklist(answer=[answer.to_common() for answer in self.answers])
5352

5453
@classmethod
5554
def from_common(cls, checklist: Checklist, feature_schema_id: Cuid,
@@ -66,9 +65,7 @@ class LBV1Dropdown(LBV1Feature):
6665
answer: List[LBV1ClassificationAnswer]
6766

6867
def to_common(self) -> Dropdown:
69-
return Dropdown(answer=[
70-
answer.to_common() for answer in self.answer
71-
])
68+
return Dropdown(answer=[answer.to_common() for answer in self.answer])
7269

7370
@classmethod
7471
def from_common(cls, dropdown: Dropdown, feature_schema_id: Cuid,

labelbox/data/serialization/labelbox_v1/label.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -137,20 +137,17 @@ class LBV1Label(BaseModel):
137137
label_url: Optional[str] = Extra('View Label')
138138
has_open_issues: Optional[float] = Extra('Has Open Issues')
139139
skipped: Optional[bool] = Extra('Skipped')
140+
media_type: Optional[str] = Extra('media_type')
140141

141142
def to_common(self) -> Label:
142143
if isinstance(self.label, list):
143144
annotations = []
144145
for lbl in self.label:
145146
annotations.extend(lbl.to_common())
146-
data = VideoData(url=self.row_data,
147-
external_id=self.external_id,
148-
uid=self.data_row_id)
149147
else:
150148
annotations = self.label.to_common()
151-
data = self._infer_media_type()
152149

153-
return Label(data=data,
150+
return Label(data=self._data_row_to_common(),
154151
uid=self.id,
155152
annotations=annotations,
156153
extra={
@@ -174,44 +171,49 @@ def from_common(cls, label: Label):
174171
external_id=label.data.external_id,
175172
**label.extra)
176173

177-
def _infer_media_type(self):
178-
# Video annotations are formatted differently from text and images
179-
# So we only need to differentiate those two
174+
def _data_row_to_common(self) -> Union[ImageData, TextData, VideoData]:
175+
# Use data row information to construct the appropriate annotatin type
180176
data_row_info = {
177+
'url' if self._is_url() else 'text': self.row_data,
181178
'external_id': self.external_id,
182179
'uid': self.data_row_id
183180
}
184181

182+
self.media_type = self.media_type or self._infer_media_type()
183+
media_mapping = {
184+
'text': TextData,
185+
'image': ImageData,
186+
'video': VideoData
187+
}
188+
if self.media_type not in media_mapping:
189+
raise ValueError(
190+
f"Annotation types are only supported for {list(media_mapping)} media types."
191+
f" Found {self.media_type}.")
192+
return media_mapping[self.media_type](**data_row_info)
193+
194+
def _infer_media_type(self) -> str:
195+
# Determines the data row type based on the label content
196+
if isinstance(self.label, list):
197+
return 'video'
185198
if self._has_text_annotations():
186-
# If it has text annotations then it must be text
187-
if self._is_url():
188-
return TextData(url=self.row_data, **data_row_info)
189-
else:
190-
return TextData(text=self.row_data, **data_row_info)
199+
return 'text'
191200
elif self._has_object_annotations():
192-
# If it has object annotations and none are text annotations then it must be an image
193-
if self._is_url():
194-
return ImageData(url=self.row_data, **data_row_info)
195-
else:
196-
return ImageData(text=self.row_data, **data_row_info)
201+
return 'image'
197202
else:
198-
# no annotations to infer data type from.
199-
# Use information from the row_data format if possible.
200203
if self._row_contains((".jpg", ".png", ".jpeg")) and self._is_url():
201-
return ImageData(url=self.row_data, **data_row_info)
202-
elif self._row_contains(
203-
(".txt", ".text", ".html")) and self._is_url():
204-
return TextData(url=self.row_data, **data_row_info)
205-
elif not self._is_url():
206-
return TextData(text=self.row_data, **data_row_info)
204+
return 'image'
205+
elif (self._row_contains((".txt", ".text", ".html")) and
206+
self._is_url()) or not self._is_url():
207+
return 'text'
207208
else:
208-
# This is going to be urls that do not contain any file extensions
209-
# This will only occur on skipped images.
210-
# To use this converter on data with this url format
211-
# filter out empty examples from the payload before deserializing.
209+
# This condition will occur when a data row url does not contain a file extension
210+
# and the label does not contain object annotations that indicate the media type.
211+
# As a temporary workaround you can explicitly set the media_type
212+
# in each label json payload before converting.
213+
# We will eventually provide the media type in the export.
212214
raise TypeError(
213-
"Can't infer data type from row data. Remove empty examples before trying again. "
214-
f"row_data: {self.row_data[:200]}")
215+
"Can't infer data type from row data. row_data: {self.row_data[:200]}"
216+
)
215217

216218
def _has_object_annotations(self):
217219
return len(self.label.objects) > 0

tests/data/serialization/labelbox_v1/test_image.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ def test_image(file_path):
1616
collection = LBV1Converter.deserialize([payload])
1717
serialized = next(LBV1Converter.serialize(collection))
1818

19+
# We are storing the media types now.
20+
payload['media_type'] = 'image'
21+
1922
assert serialized.keys() == payload.keys()
23+
2024
for key in serialized:
2125
if key != 'Label':
2226
assert serialized[key] == payload[key]

tests/data/serialization/labelbox_v1/test_text.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ def test_text():
99
collection = LBV1Converter.deserialize([payload])
1010
serialized = next(LBV1Converter.serialize(collection))
1111

12+
payload['media_type'] = 'text'
13+
1214
assert serialized.keys() == payload.keys()
1315
for key in serialized:
1416
if key != 'Label':

tests/data/serialization/labelbox_v1/test_video.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_video():
1717
open('tests/data/assets/labelbox_v1/video_export.json', 'r'))
1818
collection = LBV1Converter.deserialize([payload])
1919
serialized = next(LBV1Converter.serialize(collection))
20-
20+
payload['media_type'] = 'video'
2121
assert serialized.keys() == payload.keys()
2222
for key in serialized:
2323
if key != 'Label':

0 commit comments

Comments
 (0)