Skip to content

Commit b814162

Browse files
authored
Merge pull request #345 from Labelbox/ms/video-classification-keyframe
video classification keyframe
2 parents d47e638 + e8649d9 commit b814162

File tree

10 files changed

+131
-80
lines changed

10 files changed

+131
-80
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ The package `rasterio` installed by `labelbox[data]` relies on GDAL which could
5757
You may see the following error message:
5858

5959
```
60-
INFO:root:Building on Windows requires extra options to setup.py to locate needed GDAL files. More information is available in the README.
60+
INFO:root:Building on Windows requires extra options to setup.py to locate needed GDAL files. More information is available in the README.
6161
62-
ERROR: A GDAL API version must be specified. Provide a path to gdal-config using a GDAL_CONFIG environment variable or use a GDAL_VERSION environment variable.
62+
ERROR: A GDAL API version must be specified. Provide a path to gdal-config using a GDAL_CONFIG environment variable or use a GDAL_VERSION environment variable.
6363
```
6464

6565
As a workaround:
@@ -72,7 +72,7 @@ As a workaround:
7272

7373
Note: You need to download the right files for your Python version. In the files above `cp38` means CPython 3.8.
7474

75-
2. After downloading the files, please run the following commands, in this particular order.
75+
2. After downloading the files, please run the following commands, in this particular order.
7676

7777
```
7878
pip install GDAL‑3.3.2‑cp38‑cp38‑win_amd64.wh

labelbox/data/annotation_types/classification/classification.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Dict, List
1+
from typing import Any, Dict, List, Union, Optional
22

33
try:
44
from typing import Literal
@@ -24,13 +24,25 @@ class ClassificationAnswer(FeatureSchema):
2424
- Represents a classification option.
2525
- Because it inherits from FeatureSchema
2626
the option can be represented with either the name or feature_schema_id
27+
28+
- The key frame arg only applies to video classifications.
29+
Each answer can have a key frame indepdent of the others.
30+
So unlike object annotations, classification annotations
31+
track key frames at a classification answer level.
2732
"""
2833
extra: Dict[str, Any] = {}
34+
keyframe: Optional[bool] = None
35+
36+
def dict(self, *args, **kwargs):
37+
res = super().dict(*args, **kwargs)
38+
if res['keyframe'] is None:
39+
res.pop('keyframe')
40+
return res
2941

3042

3143
class Radio(BaseModel):
3244
""" A classification with only one selected option allowed
33-
45+
3446
>>> Radio(answer = ClassificationAnswer(name = "dog"))
3547
3648
"""
@@ -50,7 +62,7 @@ class Checklist(_TempName):
5062
class Text(BaseModel):
5163
""" Free form text
5264
53-
>>> Text(answer = "some text answer")
65+
>>> Text(answer = "some text answer")
5466
5567
"""
5668
answer: str

labelbox/data/serialization/labelbox_v1/classification.py

Lines changed: 25 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,56 +9,53 @@
99

1010

1111
class LBV1ClassificationAnswer(LBV1Feature):
12-
...
12+
13+
def to_common(self) -> ClassificationAnswer:
14+
return ClassificationAnswer(feature_schema_id=self.schema_id,
15+
name=self.title,
16+
keyframe=self.keyframe,
17+
extra={
18+
'feature_id': self.feature_id,
19+
'value': self.value
20+
})
21+
22+
@classmethod
23+
def from_common(
24+
cls,
25+
answer: ClassificationAnnotation) -> "LBV1ClassificationAnswer":
26+
return cls(schema_id=answer.feature_schema_id,
27+
title=answer.name,
28+
value=answer.extra.get('value'),
29+
feature_id=answer.extra.get('feature_id'),
30+
keyframe=answer.keyframe)
1331

1432

1533
class LBV1Radio(LBV1Feature):
1634
answer: LBV1ClassificationAnswer
1735

1836
def to_common(self) -> Radio:
19-
return Radio(answer=ClassificationAnswer(
20-
feature_schema_id=self.answer.schema_id,
21-
name=self.answer.title,
22-
extra={
23-
'feature_id': self.answer.feature_id,
24-
'value': self.answer.value
25-
}))
37+
return Radio(answer=self.answer.to_common())
2638

2739
@classmethod
2840
def from_common(cls, radio: Radio, feature_schema_id: Cuid,
2941
**extra) -> "LBV1Radio":
3042
return cls(schema_id=feature_schema_id,
31-
answer=LBV1ClassificationAnswer(
32-
schema_id=radio.answer.feature_schema_id,
33-
title=radio.answer.name,
34-
value=radio.answer.extra.get('value'),
35-
feature_id=radio.answer.extra.get('feature_id')),
43+
answer=LBV1ClassificationAnswer.from_common(radio.answer),
3644
**extra)
3745

3846

3947
class LBV1Checklist(LBV1Feature):
4048
answers: List[LBV1ClassificationAnswer]
4149

4250
def to_common(self) -> Checklist:
43-
return Checklist(answer=[
44-
ClassificationAnswer(feature_schema_id=answer.schema_id,
45-
name=answer.title,
46-
extra={
47-
'feature_id': answer.feature_id,
48-
'value': answer.value
49-
}) for answer in self.answers
50-
])
51+
return Checklist(answer=[answer.to_common() for answer in self.answers])
5152

5253
@classmethod
5354
def from_common(cls, checklist: Checklist, feature_schema_id: Cuid,
5455
**extra) -> "LBV1Checklist":
5556
return cls(schema_id=feature_schema_id,
5657
answers=[
57-
LBV1ClassificationAnswer(
58-
schema_id=answer.feature_schema_id,
59-
title=answer.name,
60-
value=answer.extra.get('value'),
61-
feature_id=answer.extra.get('feature_id'))
58+
LBV1ClassificationAnswer.from_common(answer)
6259
for answer in checklist.answer
6360
],
6461
**extra)
@@ -68,25 +65,14 @@ class LBV1Dropdown(LBV1Feature):
6865
answer: List[LBV1ClassificationAnswer]
6966

7067
def to_common(self) -> Dropdown:
71-
return Dropdown(answer=[
72-
ClassificationAnswer(feature_schema_id=answer.schema_id,
73-
name=answer.title,
74-
extra={
75-
'feature_id': answer.feature_id,
76-
'value': answer.value
77-
}) for answer in self.answer
78-
])
68+
return Dropdown(answer=[answer.to_common() for answer in self.answer])
7969

8070
@classmethod
8171
def from_common(cls, dropdown: Dropdown, feature_schema_id: Cuid,
8272
**extra) -> "LBV1Dropdown":
8373
return cls(schema_id=feature_schema_id,
8474
answer=[
85-
LBV1ClassificationAnswer(
86-
schema_id=answer.feature_schema_id,
87-
title=answer.name,
88-
value=answer.extra.get('value'),
89-
feature_id=answer.extra.get('feature_id'))
75+
LBV1ClassificationAnswer.from_common(answer)
9076
for answer in dropdown.answer
9177
],
9278
**extra)

labelbox/data/serialization/labelbox_v1/label.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -137,20 +137,17 @@ class LBV1Label(BaseModel):
137137
label_url: Optional[str] = Extra('View Label')
138138
has_open_issues: Optional[float] = Extra('Has Open Issues')
139139
skipped: Optional[bool] = Extra('Skipped')
140+
media_type: Optional[str] = Extra('media_type')
140141

141142
def to_common(self) -> Label:
142143
if isinstance(self.label, list):
143144
annotations = []
144145
for lbl in self.label:
145146
annotations.extend(lbl.to_common())
146-
data = VideoData(url=self.row_data,
147-
external_id=self.external_id,
148-
uid=self.data_row_id)
149147
else:
150148
annotations = self.label.to_common()
151-
data = self._infer_media_type()
152149

153-
return Label(data=data,
150+
return Label(data=self._data_row_to_common(),
154151
uid=self.id,
155152
annotations=annotations,
156153
extra={
@@ -174,44 +171,49 @@ def from_common(cls, label: Label):
174171
external_id=label.data.external_id,
175172
**label.extra)
176173

177-
def _infer_media_type(self):
178-
# Video annotations are formatted differently from text and images
179-
# So we only need to differentiate those two
174+
def _data_row_to_common(self) -> Union[ImageData, TextData, VideoData]:
175+
# Use data row information to construct the appropriate annotatin type
180176
data_row_info = {
177+
'url' if self._is_url() else 'text': self.row_data,
181178
'external_id': self.external_id,
182179
'uid': self.data_row_id
183180
}
184181

182+
self.media_type = self.media_type or self._infer_media_type()
183+
media_mapping = {
184+
'text': TextData,
185+
'image': ImageData,
186+
'video': VideoData
187+
}
188+
if self.media_type not in media_mapping:
189+
raise ValueError(
190+
f"Annotation types are only supported for {list(media_mapping)} media types."
191+
f" Found {self.media_type}.")
192+
return media_mapping[self.media_type](**data_row_info)
193+
194+
def _infer_media_type(self) -> str:
195+
# Determines the data row type based on the label content
196+
if isinstance(self.label, list):
197+
return 'video'
185198
if self._has_text_annotations():
186-
# If it has text annotations then it must be text
187-
if self._is_url():
188-
return TextData(url=self.row_data, **data_row_info)
189-
else:
190-
return TextData(text=self.row_data, **data_row_info)
199+
return 'text'
191200
elif self._has_object_annotations():
192-
# If it has object annotations and none are text annotations then it must be an image
193-
if self._is_url():
194-
return ImageData(url=self.row_data, **data_row_info)
195-
else:
196-
return ImageData(text=self.row_data, **data_row_info)
201+
return 'image'
197202
else:
198-
# no annotations to infer data type from.
199-
# Use information from the row_data format if possible.
200203
if self._row_contains((".jpg", ".png", ".jpeg")) and self._is_url():
201-
return ImageData(url=self.row_data, **data_row_info)
202-
elif self._row_contains(
203-
(".txt", ".text", ".html")) and self._is_url():
204-
return TextData(url=self.row_data, **data_row_info)
205-
elif not self._is_url():
206-
return TextData(text=self.row_data, **data_row_info)
204+
return 'image'
205+
elif (self._row_contains((".txt", ".text", ".html")) and
206+
self._is_url()) or not self._is_url():
207+
return 'text'
207208
else:
208-
# This is going to be urls that do not contain any file extensions
209-
# This will only occur on skipped images.
210-
# To use this converter on data with this url format
211-
# filter out empty examples from the payload before deserializing.
209+
# This condition will occur when a data row url does not contain a file extension
210+
# and the label does not contain object annotations that indicate the media type.
211+
# As a temporary workaround you can explicitly set the media_type
212+
# in each label json payload before converting.
213+
# We will eventually provide the media type in the export.
212214
raise TypeError(
213-
"Can't infer data type from row data. Remove empty examples before trying again. "
214-
f"row_data: {self.row_data[:200]}")
215+
f"Can't infer data type from row data. row_data: {self.row_data[:200]}"
216+
)
215217

216218
def _has_object_annotations(self):
217219
return len(self.label.objects) > 0

labelbox/schema/model_run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def model_run_data_rows(self):
145145
['annotationGroups', 'pageInfo', 'endCursor'])
146146

147147
def annotation_groups(self):
148-
""" `ModelRun.annotation_groups is being deprecated after version 3.9
148+
""" `ModelRun.annotation_groups is being deprecated after version 3.9
149149
in favor of ModelRun.model_run_data_rows`
150150
"""
151151
warnings.warn(
@@ -184,7 +184,7 @@ def delete_model_run_data_rows(self, data_row_ids):
184184
})
185185

186186
def delete_annotation_groups(self, data_row_ids):
187-
""" `ModelRun.delete_annotation_groups is being deprecated after version 3.9
187+
""" `ModelRun.delete_annotation_groups is being deprecated after version 3.9
188188
in favor of ModelRun.delete_model_run_data_rows`
189189
"""
190190
warnings.warn(
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"ID": "ckw3ce1mc78b50zc30dqf0qhj", "DataRow ID": "ckw3cctc41uqg0zo5023e59hn", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0", "Label": {"objects": [{"featureId": "ckw3ce58u00003e66w9rh0onm", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 99.405, "y": 56.15}, {"x": 111.421, "y": 99.129}, {"x": 146.082, "y": 80.413}, {"x": 118.815, "y": 47.369}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:56.000Z", "Updated At": "2021-11-17T09:48:56.305Z", "Seconds to Label": 2.239, "External ID": "im1", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cea3f7b9t0zbn2tgp2y83", "DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc", "Label": {"objects": [], "classifications": [{"featureId": "ckw3ced5e00023e66236meh70", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ced5e00013e6652355ejd", "schemaId": "ckw3cdy207b6w0zbn2hgp3321", "title": "op1", "value": "op_1"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:02.000Z", "Updated At": "2021-11-17T09:49:02.220Z", "Seconds to Label": 5.373, "External ID": "im3", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cec4v78ex0zc3aodwdekw", "DataRow ID": "ckw3cctc41uqs0zo52cy6eus1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4", "Label": {"objects": [{"featureId": "ckw3cefl900033e66k41q6zpc", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 69.58, "y": 42.292}, {"x": 64.932, "y": 74.128}, {"x": 91.888, "y": 64.601}, {"x": 86.775, "y": 41.828}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [{"featureId": "ckw3ceijf00053e669zaplftd", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ceijf00043e665ex22xkp", "schemaId": "ckw3cdy207b6y0zbn77201rux", "title": "op2", "value": "op_2"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:15.000Z", "Updated At": "2021-11-17T09:49:15.785Z", "Seconds to Label": 5, "External ID": "im4", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3ce1s34c1i0zbp32067q4v", "DataRow ID": "ckw3cctc41uqk0zo52n31egs1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE", "Label": {}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:59.000Z", "Updated At": "2021-11-17T09:49:02.000Z", "Seconds to Label": 3.524, "External ID": "im2", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v", "Has Open Issues": 0, "Skipped": true}]

tests/data/serialization/labelbox_v1/test_image.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@ def test_image(file_path):
2020
collection = LBV1Converter.deserialize([payload])
2121
serialized = next(LBV1Converter.serialize(collection))
2222

23+
# We are storing the media types now.
24+
payload['media_type'] = 'image'
25+
2326
assert serialized.keys() == payload.keys()
27+
2428
for key in serialized:
2529
if key != 'Label':
2630
assert serialized[key] == payload[key]

tests/data/serialization/labelbox_v1/test_text.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ def test_text():
99
collection = LBV1Converter.deserialize([payload])
1010
serialized = next(LBV1Converter.serialize(collection))
1111

12+
payload['media_type'] = 'text'
13+
1214
assert serialized.keys() == payload.keys()
1315
for key in serialized:
1416
if key != 'Label':
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import json
2+
3+
import pytest
4+
5+
from labelbox.data.serialization.labelbox_v1.converter import LBV1Converter
6+
7+
8+
def test_image():
9+
file_path = 'tests/data/assets/labelbox_v1/unkown_media_type_export.json'
10+
with open(file_path, 'r') as file:
11+
payload = json.load(file)
12+
13+
collection = list(LBV1Converter.deserialize(payload))
14+
# One of the data rows is broken.
15+
assert len(collection) != len(payload)
16+
17+
for row in payload:
18+
row['media_type'] = 'image'
19+
20+
collection = LBV1Converter.deserialize(payload)
21+
for idx, serialized in enumerate(LBV1Converter.serialize(collection)):
22+
assert serialized.keys() == payload[idx].keys()
23+
for key in serialized:
24+
if key != 'Label':
25+
assert serialized[key] == payload[idx][key]
26+
elif key == 'Label':
27+
for annotation_a, annotation_b in zip(
28+
serialized[key]['objects'],
29+
payload[idx][key]['objects']):
30+
if not len(annotation_a['classifications']):
31+
# We don't add a classification key to the payload if there is no classifications.
32+
annotation_a.pop('classifications')
33+
34+
if isinstance(annotation_b.get('classifications'),
35+
list) and len(
36+
annotation_b['classifications']):
37+
if isinstance(annotation_b['classifications'][0], list):
38+
annotation_b['classifications'] = annotation_b[
39+
'classifications'][0]
40+
41+
assert annotation_a == annotation_b
42+
43+
44+
# After check the nd serializer on this shit.. It should work for almost everything (except the other horse shit..)

0 commit comments

Comments
 (0)