Skip to content

Commit 40f9531

Browse files
authored
Merge pull request #63 from scaleapi/da/validation_serialization
Da/validation serialization
2 parents 66d2b49 + c4815d3 commit 40f9531

File tree

4 files changed

+93
-16
lines changed

4 files changed

+93
-16
lines changed

nucleus/annotation.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
1+
import json
12
from dataclasses import dataclass
23
from enum import Enum
3-
from typing import Dict, Optional, Any, Union, List
4+
from typing import Any, Dict, List, Optional, Union
5+
46
from .constants import (
57
ANNOTATION_ID_KEY,
8+
ANNOTATIONS_KEY,
9+
BOX_TYPE,
610
DATASET_ITEM_ID_KEY,
7-
REFERENCE_ID_KEY,
8-
METADATA_KEY,
9-
X_KEY,
10-
Y_KEY,
11-
WIDTH_KEY,
12-
HEIGHT_KEY,
1311
GEOMETRY_KEY,
14-
BOX_TYPE,
15-
POLYGON_TYPE,
12+
HEIGHT_KEY,
13+
INDEX_KEY,
14+
ITEM_ID_KEY,
1615
LABEL_KEY,
16+
MASK_URL_KEY,
17+
METADATA_KEY,
18+
POLYGON_TYPE,
19+
REFERENCE_ID_KEY,
1720
TYPE_KEY,
1821
VERTICES_KEY,
19-
ITEM_ID_KEY,
20-
MASK_URL_KEY,
21-
INDEX_KEY,
22-
ANNOTATIONS_KEY,
22+
WIDTH_KEY,
23+
X_KEY,
24+
Y_KEY,
2325
)
2426

2527

@@ -42,6 +44,15 @@ def from_json(cls, payload: dict):
4244
else:
4345
return SegmentationAnnotation.from_json(payload)
4446

47+
def to_payload(self):
48+
raise NotImplementedError(
49+
"For serialization, use a specific subclass (i.e. SegmentationAnnotation), "
50+
"not the base annotation class."
51+
)
52+
53+
def to_json(self) -> str:
54+
return json.dumps(self.to_payload())
55+
4556

4657
@dataclass
4758
class Segment:

nucleus/dataset_item.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
from dataclasses import dataclass
1+
import json
22
import os.path
3+
from dataclasses import dataclass
34
from typing import Optional
5+
46
from .constants import (
7+
DATASET_ITEM_ID_KEY,
58
IMAGE_URL_KEY,
69
METADATA_KEY,
7-
REFERENCE_ID_KEY,
810
ORIGINAL_IMAGE_URL_KEY,
9-
DATASET_ITEM_ID_KEY,
11+
REFERENCE_ID_KEY,
1012
)
1113

1214

@@ -51,3 +53,6 @@ def to_payload(self) -> dict:
5153
if self.item_id:
5254
payload[DATASET_ITEM_ID_KEY] = self.item_id
5355
return payload
56+
57+
def to_json(self) -> str:
58+
return json.dumps(self.to_payload())

nucleus/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,25 @@ def format_dataset_item_response(response: dict) -> dict:
7070
ITEM_KEY: DatasetItem.from_json(item),
7171
ANNOTATIONS_KEY: annotation_response,
7272
}
73+
74+
75+
def serialize_and_write(
76+
upload_unit: List[Union[DatasetItem, Annotation]], file_pointer
77+
):
78+
for unit in upload_unit:
79+
try:
80+
file_pointer.write(unit.to_json())
81+
except TypeError as e:
82+
type_name = type(unit).__name__
83+
message = (
84+
f"The following {type_name} could not be serialized: {unit}\n"
85+
)
86+
message += (
87+
"This is usally an issue with a custom python object being "
88+
"present in the metadata. Please inspect this error and adjust the "
89+
"metadata so it is json-serializable: only python primitives such as "
90+
"strings, ints, floats, lists, and dicts. For example, you must "
91+
"convert numpy arrays into list or lists of lists.\n"
92+
)
93+
message += f"The specific error was {e}"
94+
raise ValueError(message) from e

tests/test_utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import pytest
2+
from nucleus import DatasetItem
3+
from nucleus import utils
4+
5+
import io
6+
7+
8+
class TestNonSerializableObject:
9+
def weird_function():
10+
print("can't touch this. Dun dun dun dun.")
11+
12+
13+
def test_serialize():
14+
15+
test_items = [
16+
DatasetItem("fake_url1", "fake_id1"),
17+
DatasetItem(
18+
"fake_url2",
19+
"fake_id2",
20+
metadata={
21+
"ok": "field",
22+
"bad": TestNonSerializableObject(),
23+
},
24+
),
25+
]
26+
27+
with io.StringIO() as in_memory_filelike:
28+
with pytest.raises(ValueError) as error:
29+
utils.serialize_and_write(
30+
test_items,
31+
in_memory_filelike,
32+
)
33+
assert "DatasetItem" in str(error.value)
34+
assert "fake_id2" in str(error.value)
35+
assert "fake_id1" not in str(error.value)
36+
37+
test_items[1].metadata["bad"] = "fixed"
38+
39+
utils.serialize_and_write(test_items, in_memory_filelike)

0 commit comments

Comments
 (0)