Skip to content

Commit 7e7bb42

Browse files
committed
everything works
1 parent 3033275 commit 7e7bb42

File tree

5 files changed

+125
-8
lines changed

5 files changed

+125
-8
lines changed

nucleus/__init__.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969

7070
from .dataset import Dataset
7171
from .dataset_item import DatasetItem
72-
from .annotation import BoxAnnotation, PolygonAnnotation
72+
from .annotation import BoxAnnotation, PolygonAnnotation, SegmentationAnnotation
7373
from .prediction import BoxPrediction, PolygonPrediction
7474
from .model_run import ModelRun
7575
from .slice import Slice
@@ -464,7 +464,7 @@ def exception_handler(request, exception):
464464
def annotate_dataset(
465465
self,
466466
dataset_id: str,
467-
annotations: List[Union[BoxAnnotation, PolygonAnnotation]],
467+
annotations: List[Union[BoxAnnotation, PolygonAnnotation, SegmentationAnnotation]],
468468
update: bool,
469469
batch_size: int = 100,
470470
):
@@ -476,9 +476,18 @@ def annotate_dataset(
476476
:return: {"dataset_id: str, "annotations_processed": int}
477477
"""
478478

479+
# Split payload into segmentations and Box/Polygon
480+
segmentations = [ann for ann in annotations if isinstance(ann, SegmentationAnnotation)]
481+
other_annotations = [ann for ann in annotations if not isinstance(ann, SegmentationAnnotation)]
482+
479483
batches = [
480-
annotations[i : i + batch_size]
481-
for i in range(0, len(annotations), batch_size)
484+
other_annotations[i : i + batch_size]
485+
for i in range(0, len(other_annotations), batch_size)
486+
]
487+
488+
semseg_batches = [
489+
segmentations[i : i + batch_size]
490+
for i in range(0, len(segmentations), batch_size)
482491
]
483492

484493
agg_response = {
@@ -504,6 +513,23 @@ def annotate_dataset(
504513
ANNOTATIONS_IGNORED_KEY
505514
]
506515

516+
for s_batch in semseg_batches:
517+
payload = {"segmentations": [seg.to_payload() for seg in s_batch]}
518+
if update:
519+
payload["force"] = update
520+
response = self._make_request(
521+
payload, f"dataset/{dataset_id}/annotate_segmentation"
522+
)
523+
if STATUS_CODE_KEY in response:
524+
agg_response[ERRORS_KEY] = response
525+
else:
526+
agg_response[ANNOTATIONS_PROCESSED_KEY] += response[
527+
ANNOTATIONS_PROCESSED_KEY
528+
]
529+
agg_response[ANNOTATIONS_IGNORED_KEY] += response[
530+
ANNOTATIONS_IGNORED_KEY
531+
]
532+
507533
return agg_response
508534

509535
def ingest_tasks(self, dataset_id: str, payload: dict):

nucleus/annotation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,15 @@ def __str__(self):
7575
def from_json(cls, payload: dict):
7676
return cls(
7777
mask_url=payload[MASK_URL_KEY],
78-
annotations=payload[ANNOTATIONS_KEY],
78+
annotations=[Segment.from_json(ann) for ann in payload.get(ANNOTATIONS_KEY, [])],
7979
reference_id=payload.get(REFERENCE_ID_KEY, None),
8080
item_id=payload.get(ITEM_ID_KEY, None),
8181
)
8282

8383
def to_payload(self) -> dict:
8484
payload = {
8585
MASK_URL_KEY: self.mask_url,
86-
ANNOTATIONS_KEY: [ann.to_payload for ann in self.annotations],
86+
ANNOTATIONS_KEY: [ann.to_payload() for ann in self.annotations],
8787
}
8888
if self.reference_id:
8989
payload[REFERENCE_ID_KEY] = self.reference_id

nucleus/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@
5050
GEOMETRY_KEY = "geometry"
5151
AUTOTAGS_KEY = "autotags"
5252
MASK_URL_KEY = "mask_url"
53-
INDEX_KEY = "index_key"
53+
INDEX_KEY = "index"

tests/helpers.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
from pathlib import Path
2+
from urllib.parse import urlparse
3+
import boto3
4+
5+
PRESIGN_EXPIRY_SECONDS = 60*60*24*2 #2 days
26

37
TEST_MODEL_NAME = '[PyTest] Test Model'
48
TEST_MODEL_REFERENCE = '[PyTest] Test Model Reference'
@@ -13,6 +17,27 @@
1317
's3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/89b42832-10d662f4.jpg',
1418
]
1519

20+
def get_signed_url(url):
21+
bucket, key = get_s3_details(url)
22+
return s3_sign(bucket, key)
23+
24+
def get_s3_details(url):
25+
# Expects S3 URL format to be https://<BUCKET>.s3.amazonaws.com/<KEY>
26+
parsed = urlparse(url)
27+
bucket = parsed.netloc[:parsed.netloc.find(".")]
28+
return bucket, parsed.path[1:]
29+
30+
def s3_sign(bucket, key):
31+
s3 = boto3.client("s3")
32+
return s3.generate_presigned_url(
33+
ClientMethod="get_object",
34+
Params={
35+
"Bucket": bucket,
36+
"Key": key,
37+
},
38+
ExpiresIn=PRESIGN_EXPIRY_SECONDS,
39+
)
40+
1641
def reference_id_from_url(url):
1742
return Path(url).name
1843

@@ -45,6 +70,18 @@ def reference_id_from_url(url):
4570
for i in range(len(TEST_IMG_URLS))
4671
]
4772

73+
TEST_MASK_URL = "https://scale-temp.s3.amazonaws.com/scale-select/nucleus/mscoco_semseg_masks_uint8/000000000285.png"
74+
TEST_SEGMENTATION_ANNOTATIONS = [
75+
{
76+
"reference_id": reference_id_from_url(TEST_IMG_URLS[i]),
77+
"mask_url": get_signed_url(TEST_MASK_URL),
78+
"annotations": [
79+
{"label": "bear", "index": 2}, {"label": "grass-merged", "index": 1}
80+
]
81+
}
82+
for i in range(len(TEST_IMG_URLS))
83+
]
84+
4885
TEST_BOX_PREDICTIONS = [
4986
{
5087
**TEST_BOX_ANNOTATIONS[i],

tests/test_annotation.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
TEST_IMG_URLS,
66
TEST_BOX_ANNOTATIONS,
77
TEST_POLYGON_ANNOTATIONS,
8+
TEST_SEGMENTATION_ANNOTATIONS,
89
reference_id_from_url,
910
assert_box_annotation_matches_dict,
1011
assert_polygon_annotation_matches_dict,
1112
)
1213

13-
from nucleus import BoxAnnotation, PolygonAnnotation, DatasetItem
14+
from nucleus import BoxAnnotation, PolygonAnnotation, SegmentationAnnotation, DatasetItem
1415
from nucleus.constants import ERROR_PAYLOAD
1516

1617
@pytest.fixture()
@@ -58,6 +59,59 @@ def test_polygon_gt_upload(dataset):
5859
response_annotation = response[0]
5960
assert_polygon_annotation_matches_dict(response_annotation, TEST_POLYGON_ANNOTATIONS[0])
6061

62+
def test_single_semseg_gt_upload(dataset):
63+
annotation = SegmentationAnnotation.from_json(TEST_SEGMENTATION_ANNOTATIONS[0])
64+
response = dataset.annotate(annotations=[annotation])
65+
assert response['dataset_id'] == dataset.id
66+
assert response['annotations_processed'] == 1
67+
assert response['annotations_ignored'] == 0
68+
# assert_box_annotation_matches_dict(response_annotation, TEST_BOX_ANNOTATIONS[0])
69+
70+
def test_batch_semseg_gt_upload(dataset):
71+
annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
72+
response = dataset.annotate(annotations=annotations)
73+
assert response['dataset_id'] == dataset.id
74+
assert response['annotations_processed'] == 5
75+
assert response['annotations_ignored'] == 0
76+
77+
def test_batch_semseg_gt_upload_ignore(dataset):
78+
# First upload annotations
79+
annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
80+
response = dataset.annotate(annotations=annotations)
81+
assert response['dataset_id'] == dataset.id
82+
assert response['annotations_processed'] == 5
83+
assert response['annotations_ignored'] == 0
84+
85+
#When we re-upload, expect them to be ignored
86+
response = dataset.annotate(annotations=annotations)
87+
assert response['dataset_id'] == dataset.id
88+
assert response['annotations_processed'] == 0
89+
assert response['annotations_ignored'] == 5
90+
91+
def test_batch_semseg_gt_upload_update(dataset):
92+
# First upload annotations
93+
annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
94+
response = dataset.annotate(annotations=annotations)
95+
assert response['dataset_id'] == dataset.id
96+
assert response['annotations_processed'] == 5
97+
assert response['annotations_ignored'] == 0
98+
99+
#When we re-upload, expect them to be ignored
100+
response = dataset.annotate(annotations=annotations, update=True)
101+
assert response['dataset_id'] == dataset.id
102+
assert response['annotations_processed'] == 5
103+
assert response['annotations_ignored'] == 0
104+
105+
106+
def test_mixed_annotation_upload(dataset):
107+
# First upload annotations
108+
semseg_annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
109+
bbox_annotations = [BoxAnnotation(**(ann)) for ann in TEST_BOX_ANNOTATIONS]
110+
annotations = bbox_annotations + semseg_annotations
111+
response = dataset.annotate(annotations=annotations)
112+
assert response['dataset_id'] == dataset.id
113+
assert response['annotations_processed'] == 10
114+
assert response['annotations_ignored'] == 0
61115

62116
def test_box_gt_upload_update(dataset):
63117
annotation = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])

0 commit comments

Comments
 (0)