Skip to content

Commit da7e7f8

Browse files
authored
Merge pull request #30 from scaleapi/jihan-idempotency
Adds annotation_id as optional parameter to annotations and prediction classes. This will be used to uniquely identify each annotation/prediction within its dataset item. Also adds update as an optional boolean parameter when uploading predictions and annotations. Setting update=False, the default behavior, will skip uploading annotations to dataset images that already have annotations with the same annotation_id. Setting update=True will overwrite such annotations.
2 parents 7601c29 + 2615d43 commit da7e7f8

13 files changed

+507
-74
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ item = dataset.loc("dataset_item_id")
9292
Upload groundtruth annotations for the items in your dataset.
9393
Box2DAnnotation has same format as https://dashboard.scale.com/nucleus/docs/api#add-ground-truth
9494
```python
95-
annotation_1 = BoxAnnotation(reference_id="1", label="label", x=0, y=0, width=10, height=10, metadata={})
96-
annotation_2 = BoxAnnotation(reference_id="2", label="label", x=0, y=0, width=10, height=10, metadata={})
95+
annotation_1 = BoxAnnotation(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_1", metadata={})
96+
annotation_2 = BoxAnnotation(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_2", metadata={})
9797
response = dataset.annotate([annotation_1, annotation_2])
9898
```
9999

@@ -113,8 +113,8 @@ Returns the associated model_id, human-readable name of the run, status, and use
113113
Takes a list of Box2DPredictions within the payload, where Box2DPrediction
114114
is formulated as in https://dashboard.scale.com/nucleus/docs/api#upload-model-outputs
115115
```python
116-
prediction_1 = BoxPrediction(reference_id="1", label="label", x=0, y=0, width=10, height=10, confidence=0.9)
117-
prediction_2 = BoxPrediction(reference_id="2", label="label", x=0, y=0, width=10, height=10, confidence=0.2)
116+
prediction_1 = BoxPrediction(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_1", confidence=0.9)
117+
prediction_2 = BoxPrediction(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_2", confidence=0.2)
118118

119119
model_run = model.create_run(name="My Model Run", metadata={"timestamp": "121012401"}, dataset=dataset, predictions=[prediction_1, prediction_2])
120120
```

nucleus/__init__.py

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
from typing import List, Union, Dict, Callable, Any, Optional
5959

6060
import tqdm
61-
import tqdm.notebook
61+
import tqdm.notebook as tqdm_notebook
6262

6363
import grequests
6464
import requests
@@ -76,7 +76,7 @@
7676
from .upload_response import UploadResponse
7777
from .payload_constructor import (
7878
construct_append_payload,
79-
construct_box_annotation_payload,
79+
construct_annotation_payload,
8080
construct_model_creation_payload,
8181
construct_box_predictions_payload,
8282
)
@@ -94,7 +94,9 @@
9494
DATASET_ITEM_ID_KEY,
9595
SLICE_ID_KEY,
9696
ANNOTATIONS_PROCESSED_KEY,
97+
ANNOTATIONS_IGNORED_KEY,
9798
PREDICTIONS_PROCESSED_KEY,
99+
PREDICTIONS_IGNORED_KEY,
98100
STATUS_CODE_KEY,
99101
SUCCESS_STATUS_CODES,
100102
DATASET_NAME_KEY,
@@ -129,7 +131,7 @@ def __init__(self, api_key: str, use_notebook: bool = False):
129131
self.api_key = api_key
130132
self.tqdm_bar = tqdm.tqdm
131133
if use_notebook:
132-
self.tqdm_bar = tqdm.notebook.tqdm
134+
self.tqdm_bar = tqdm_notebook.tqdm
133135

134136
def list_models(self) -> List[str]:
135137
"""
@@ -462,13 +464,15 @@ def exception_handler(request, exception):
462464
def annotate_dataset(
463465
self,
464466
dataset_id: str,
465-
annotations: List[DatasetItem],
467+
annotations: List[Union[BoxAnnotation, PolygonAnnotation]],
468+
update: bool,
466469
batch_size: int = 100,
467470
):
468471
"""
469472
Uploads ground truth annotations for a given dataset.
470473
:param dataset_id: id of the dataset
471-
:param annotations: List[DatasetItem]
474+
:param annotations: List[Union[BoxAnnotation, PolygonAnnotation]]
475+
:param update: whether to update or ignore conflicting annotations
472476
:return: {"dataset_id: str, "annotations_processed": int}
473477
"""
474478

@@ -480,12 +484,13 @@ def annotate_dataset(
480484
agg_response = {
481485
DATASET_ID_KEY: dataset_id,
482486
ANNOTATIONS_PROCESSED_KEY: 0,
487+
ANNOTATIONS_IGNORED_KEY: 0,
483488
}
484489

485490
tqdm_batches = self.tqdm_bar(batches)
486491

487492
for batch in tqdm_batches:
488-
payload = construct_box_annotation_payload(batch)
493+
payload = construct_annotation_payload(batch, update)
489494
response = self._make_request(
490495
payload, f"dataset/{dataset_id}/annotate"
491496
)
@@ -495,6 +500,9 @@ def annotate_dataset(
495500
agg_response[ANNOTATIONS_PROCESSED_KEY] += response[
496501
ANNOTATIONS_PROCESSED_KEY
497502
]
503+
agg_response[ANNOTATIONS_IGNORED_KEY] += response[
504+
ANNOTATIONS_IGNORED_KEY
505+
]
498506

499507
return agg_response
500508

@@ -570,39 +578,40 @@ def create_model_run(self, dataset_id: str, payload: dict) -> ModelRun:
570578
def predict(
571579
self,
572580
model_run_id: str,
573-
payload: Dict[str, List[Union[BoxPrediction, PolygonPrediction]]],
581+
annotations: List[Union[BoxPrediction, PolygonPrediction]],
582+
update: bool,
574583
batch_size: int = 100,
575584
):
576585
"""
577586
Uploads model outputs as predictions for a model_run. Returns info about the upload.
578-
:param payload:
579-
{
580-
"annotations": List[Box2DPrediction],
581-
}
587+
:param annotations: List[Union[BoxPrediction, PolygonPrediction]],
588+
:param update: bool
582589
:return:
583590
{
584591
"dataset_id": str,
585592
"model_run_id": str,
586-
"annotations_processed: int,
593+
"predictions_processed": int,
594+
"predictions_ignored": int,
587595
}
588596
"""
589-
predictions: List[Union[BoxPrediction, PolygonPrediction]] = payload[
590-
ANNOTATIONS_KEY
591-
]
592597
batches = [
593-
predictions[i : i + batch_size]
594-
for i in range(0, len(predictions), batch_size)
598+
annotations[i : i + batch_size]
599+
for i in range(0, len(annotations), batch_size)
595600
]
596601

597602
agg_response = {
598603
MODEL_RUN_ID_KEY: model_run_id,
599604
PREDICTIONS_PROCESSED_KEY: 0,
605+
PREDICTIONS_IGNORED_KEY: 0,
600606
}
601607

602608
tqdm_batches = self.tqdm_bar(batches)
603609

604610
for batch in tqdm_batches:
605-
batch_payload = {ANNOTATIONS_KEY: batch}
611+
batch_payload = construct_box_predictions_payload(
612+
annotations,
613+
update,
614+
)
606615
response = self._make_request(
607616
batch_payload, f"modelRun/{model_run_id}/predict"
608617
)
@@ -612,6 +621,9 @@ def predict(
612621
agg_response[PREDICTIONS_PROCESSED_KEY] += response[
613622
PREDICTIONS_PROCESSED_KEY
614623
]
624+
agg_response[PREDICTIONS_IGNORED_KEY] += response[
625+
PREDICTIONS_IGNORED_KEY
626+
]
615627

616628
return agg_response
617629
# return self._make_request(payload, f"modelRun/{model_run_id}/predict")
@@ -699,7 +711,7 @@ def predictions_ref_id(self, model_run_id: str, ref_id: str):
699711
:param reference_id: reference_id of a dataset item.
700712
:return:
701713
{
702-
"annotations": List[Box2DPrediction],
714+
"annotations": List[BoxPrediction],
703715
}
704716
"""
705717
return self._make_request(
@@ -724,7 +736,7 @@ def predictions_iloc(self, model_run_id: str, i: int):
724736
:param i: absolute number of Dataset Item for a dataset corresponding to the model run.
725737
:return:
726738
{
727-
"annotations": List[Box2DPrediction],
739+
"annotations": List[BoxPrediction],
728740
}
729741
"""
730742
return self._make_request(
@@ -753,7 +765,7 @@ def predictions_loc(self, model_run_id: str, dataset_item_id: str):
753765
:param dataset_item_id: dataset_item_id of a dataset item.
754766
:return:
755767
{
756-
"annotations": List[Box2DPrediction],
768+
"annotations": List[BoxPrediction],
757769
}
758770
"""
759771
return self._make_request(

nucleus/annotation.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from enum import Enum
22
from typing import Dict, Optional, Any, Union, List
33
from .constants import (
4+
ANNOTATION_ID_KEY,
45
DATASET_ITEM_ID_KEY,
56
REFERENCE_ID_KEY,
67
METADATA_KEY,
@@ -32,8 +33,9 @@ def __init__(
3233
y: Union[float, int],
3334
width: Union[float, int],
3435
height: Union[float, int],
35-
reference_id: str = None,
36-
item_id: str = None,
36+
reference_id: Optional[str] = None,
37+
item_id: Optional[str] = None,
38+
annotation_id: Optional[str] = None,
3739
metadata: Optional[Dict] = None,
3840
):
3941
if bool(reference_id) == bool(item_id):
@@ -47,6 +49,7 @@ def __init__(
4749
self.height = height
4850
self.reference_id = reference_id
4951
self.item_id = item_id
52+
self.annotation_id = annotation_id
5053
self.metadata = metadata if metadata else {}
5154

5255
@classmethod
@@ -60,6 +63,7 @@ def from_json(cls, payload: dict):
6063
height=geometry.get(HEIGHT_KEY, 0),
6164
reference_id=payload.get(REFERENCE_ID_KEY, None),
6265
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
66+
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
6367
metadata=payload.get(METADATA_KEY, {}),
6468
)
6569

@@ -74,6 +78,7 @@ def to_payload(self) -> dict:
7478
HEIGHT_KEY: self.height,
7579
},
7680
REFERENCE_ID_KEY: self.reference_id,
81+
ANNOTATION_ID_KEY: self.annotation_id,
7782
METADATA_KEY: self.metadata,
7883
}
7984

@@ -87,8 +92,9 @@ def __init__(
8792
self,
8893
label: str,
8994
vertices: List[Any],
90-
reference_id: str = None,
91-
item_id: str = None,
95+
reference_id: Optional[str] = None,
96+
item_id: Optional[str] = None,
97+
annotation_id: Optional[str] = None,
9298
metadata: Optional[Dict] = None,
9399
):
94100
if bool(reference_id) == bool(item_id):
@@ -99,6 +105,7 @@ def __init__(
99105
self.vertices = vertices
100106
self.reference_id = reference_id
101107
self.item_id = item_id
108+
self.annotation_id = annotation_id
102109
self.metadata = metadata if metadata else {}
103110

104111
@classmethod
@@ -109,6 +116,7 @@ def from_json(cls, payload: dict):
109116
vertices=geometry.get(VERTICES_KEY, []),
110117
reference_id=payload.get(REFERENCE_ID_KEY, None),
111118
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
119+
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
112120
metadata=payload.get(METADATA_KEY, {}),
113121
)
114122

@@ -118,8 +126,9 @@ def to_payload(self) -> dict:
118126
TYPE_KEY: POLYGON_TYPE,
119127
GEOMETRY_KEY: {VERTICES_KEY: self.vertices},
120128
REFERENCE_ID_KEY: self.reference_id,
129+
ANNOTATION_ID_KEY: self.annotation_id,
121130
METADATA_KEY: self.metadata,
122131
}
123132

124133
def __str__(self):
125-
return str(self.to_payload())
134+
return str(self.to_payload())

nucleus/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,13 @@
1313
ERROR_PAYLOAD = "error_payload"
1414
ERROR_CODES = "error_codes"
1515
ANNOTATIONS_KEY = "annotations"
16+
ANNOTATION_ID_KEY = "annotation_id"
1617
ANNOTATIONS_PROCESSED_KEY = "annotations_processed"
18+
ANNOTATIONS_IGNORED_KEY = "annotations_ignored"
1719
PREDICTIONS_PROCESSED_KEY = "predictions_processed"
20+
PREDICTIONS_IGNORED_KEY = "predictions_ignored"
21+
ANNOTATION_UPDATE_KEY = "update"
22+
DEFAULT_ANNOTATION_UPDATE_MODE = False
1823
STATUS_CODE_KEY = "status_code"
1924
SUCCESS_STATUS_CODES = [200, 201]
2025
ERRORS_KEY = "errors"

nucleus/dataset.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
REFERENCE_IDS_KEY,
1111
NAME_KEY,
1212
ITEM_KEY,
13+
DEFAULT_ANNOTATION_UPDATE_MODE,
1314
ANNOTATIONS_KEY,
1415
)
1516
from .payload_constructor import construct_model_run_creation_payload
1617

17-
1818
class Dataset:
1919
"""
2020
Nucleus Dataset. You can append images with metadata to your dataset,
@@ -88,6 +88,7 @@ def create_model_run(
8888
def annotate(
8989
self,
9090
annotations: List[Union[BoxAnnotation, PolygonAnnotation]],
91+
update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
9192
batch_size: int = 20,
9293
) -> dict:
9394
"""
@@ -103,7 +104,7 @@ def annotate(
103104
}
104105
"""
105106
return self._client.annotate_dataset(
106-
self.id, annotations, batch_size=batch_size
107+
self.id, annotations, update=update, batch_size=batch_size
107108
)
108109

109110
def ingest_tasks(self, task_ids: dict):
@@ -165,7 +166,7 @@ def refloc(self, reference_id: str) -> dict:
165166
:return:
166167
{
167168
"item": DatasetItem,
168-
"annotations": List[Box2DAnnotation],
169+
"annotations": List[Union[BoxAnnotation, PolygonAnnotation]],
169170
}
170171
"""
171172
response = self._client.dataitem_ref_id(self.id, reference_id)

nucleus/model_run.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from typing import Optional, List, Dict, Any, Union
2-
from .constants import ANNOTATIONS_KEY
2+
from .constants import ANNOTATIONS_KEY, DEFAULT_ANNOTATION_UPDATE_MODE
33
from .prediction import BoxPrediction, PolygonPrediction
44
from .payload_constructor import construct_box_predictions_payload
55

@@ -61,30 +61,27 @@ def commit(self, payload: Optional[dict] = None) -> dict:
6161
return self._client.commit_model_run(self.model_run_id, payload)
6262

6363
def predict(
64-
self, annotations: List[Union[BoxPrediction, PolygonPrediction]]
64+
self,
65+
annotations: List[Union[BoxPrediction, PolygonPrediction]],
66+
update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
6567
) -> dict:
6668
"""
6769
Uploads model outputs as predictions for a model_run. Returns info about the upload.
6870
:param annotations: List[Union[BoxPrediction, PolygonPrediction]],
6971
:return:
7072
{
71-
"dataset_id": str,
7273
"model_run_id": str,
73-
"annotations_processed: int,
74+
"predictions_processed": int,
75+
"predictions_ignored": int,
7476
}
7577
"""
76-
payload: Dict[str, List[Any]] = construct_box_predictions_payload(
77-
annotations
78-
)
79-
return self._client.predict(self.model_run_id, payload)
78+
return self._client.predict(self.model_run_id, annotations, update)
8079

8180
def iloc(self, i: int):
8281
"""
8382
Returns Model Run Info For Dataset Item by its number.
8483
:param i: absolute number of Dataset Item for a dataset corresponding to the model run.
85-
:return:
86-
{
87-
"annotations": List[Union[BoxPrediction, PolygonPrediction]],
84+
:return: List[Union[BoxPrediction, PolygonPrediction]],
8885
}
8986
"""
9087
response = self._client.predictions_iloc(self.model_run_id, i)
@@ -94,10 +91,7 @@ def refloc(self, reference_id: str):
9491
"""
9592
Returns Model Run Info For Dataset Item by its reference_id.
9693
:param reference_id: reference_id of a dataset item.
97-
:return:
98-
{
99-
"annotations": List[Union[BoxPrediction, PolygonPrediction]],
100-
}
94+
:return: List[Union[BoxPrediction, PolygonPrediction]],
10195
"""
10296
response = self._client.predictions_ref_id(
10397
self.model_run_id, reference_id

0 commit comments

Comments
 (0)