Skip to content

Commit fbf9bdb

Browse files
committed
Serialization util added
1 parent 2e515a4 commit fbf9bdb

File tree

4 files changed

+57
-17
lines changed

4 files changed

+57
-17
lines changed

nucleus/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,9 @@ def preprocess_payload(batch):
409409
(ITEMS_KEY, (None, json.dumps(batch), "application/json"))
410410
]
411411
for item in batch:
412-
image = open(item.get(IMAGE_URL_KEY), "rb")
412+
image = open( # pylint: disable=R1732
413+
item.get(IMAGE_URL_KEY), "rb" # pylint: disable=R1732
414+
) # pylint: disable=R1732
413415
img_name = os.path.basename(image.name)
414416
img_type = (
415417
f"image/{os.path.splitext(image.name)[1].strip('.')}"

nucleus/annotation.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
1+
import json
12
from dataclasses import dataclass
23
from enum import Enum
3-
from typing import Dict, Optional, Any, Union, List
4+
from typing import Any, Dict, List, Optional, Union
5+
46
from .constants import (
57
ANNOTATION_ID_KEY,
8+
ANNOTATIONS_KEY,
9+
BOX_TYPE,
610
DATASET_ITEM_ID_KEY,
7-
REFERENCE_ID_KEY,
8-
METADATA_KEY,
9-
X_KEY,
10-
Y_KEY,
11-
WIDTH_KEY,
12-
HEIGHT_KEY,
1311
GEOMETRY_KEY,
14-
BOX_TYPE,
15-
POLYGON_TYPE,
12+
HEIGHT_KEY,
13+
INDEX_KEY,
14+
ITEM_ID_KEY,
1615
LABEL_KEY,
16+
MASK_URL_KEY,
17+
METADATA_KEY,
18+
POLYGON_TYPE,
19+
REFERENCE_ID_KEY,
1720
TYPE_KEY,
1821
VERTICES_KEY,
19-
ITEM_ID_KEY,
20-
MASK_URL_KEY,
21-
INDEX_KEY,
22-
ANNOTATIONS_KEY,
22+
WIDTH_KEY,
23+
X_KEY,
24+
Y_KEY,
2325
)
2426

2527

@@ -42,6 +44,15 @@ def from_json(cls, payload: dict):
4244
else:
4345
return SegmentationAnnotation.from_json(payload)
4446

47+
def to_payload(self):
48+
raise NotImplementedError(
49+
"For serialization, use a specific subclass (i.e. SegmentationAnnotation), "
50+
"not the base annotation class."
51+
)
52+
53+
def to_json(self) -> str:
54+
return json.dumps(self.to_payload())
55+
4556

4657
@dataclass
4758
class Segment:

nucleus/dataset_item.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
from dataclasses import dataclass
1+
import json
22
import os.path
3+
from dataclasses import dataclass
34
from typing import Optional
5+
46
from .constants import (
7+
DATASET_ITEM_ID_KEY,
58
IMAGE_URL_KEY,
69
METADATA_KEY,
7-
REFERENCE_ID_KEY,
810
ORIGINAL_IMAGE_URL_KEY,
9-
DATASET_ITEM_ID_KEY,
11+
REFERENCE_ID_KEY,
1012
)
1113

1214

@@ -55,3 +57,6 @@ def to_payload(self) -> dict:
5557
if self.item_id:
5658
payload[DATASET_ITEM_ID_KEY] = self.item_id
5759
return payload
60+
61+
def to_json(self) -> str:
62+
return json.dumps(self.to_payload())

nucleus/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,25 @@ def format_dataset_item_response(response: dict) -> dict:
7070
ITEM_KEY: DatasetItem.from_json(item),
7171
ANNOTATIONS_KEY: annotation_response,
7272
}
73+
74+
75+
def serialize_and_write(
76+
upload_unit: List[Union[DatasetItem, Annotation]], file_pointer
77+
):
78+
for unit in upload_unit:
79+
try:
80+
file_pointer.write(unit.to_json())
81+
except TypeError as e:
82+
type_name = type(unit).__name__
83+
message = (
84+
f"The following {type_name} could not be serialized: {unit}\n"
85+
)
86+
message += (
87+
"This is usally an issue with a custom python object being "
88+
"present in the metadata. Please inspect this error and adjust the "
89+
"metadata so it is json-serializable: only python primitives such as "
90+
"strings, ints, floats, lists, and dicts. For example, you must "
91+
"convert numpy arrays into list or lists of lists.\n"
92+
)
93+
message += f"The specific error was {e}"
94+
raise ValueError(message) from e

0 commit comments

Comments
 (0)