Skip to content

Commit 2687568

Browse files
authored
Merge pull request #143 from scaleapi/da-document-predictions
Document predictions, model, model_run and autocurate
2 parents dfaffe2 + eae5455 commit 2687568

File tree

6 files changed

+145
-3
lines changed

6 files changed

+145
-3
lines changed

docs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# from the environment for the first two.
66
SPHINXOPTS ?=
77
SPHINXBUILD ?= sphinx-build
8-
SOURCEDIR = source
8+
SOURCEDIR = .
99
BUILDDIR = build
1010

1111
# Put it first so that "make" without argument is like "make help".

nucleus/autocurate.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""This module can be used to compute active learning metrics on your predictions.
2+
3+
For more details on usage see the example colab in scripts/autocurate_bdd.ipynb
4+
"""
5+
6+
17
import datetime
28
import requests
39
from nucleus.constants import (
@@ -9,6 +15,7 @@
915

1016

1117
def entropy(name, model_run, client):
18+
"""Computes the mean entropy across all predictions for each image."""
1219
model_run_ids = [model_run.model_run_id]
1320
dataset_id = model_run.dataset_id
1421
response = client.make_request(

nucleus/model.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,63 @@
1+
"""
2+
By uploading model predictions to Nucleus, you can compare your predictions to ground truth annotations and discover problems with your Models or Dataset.
3+
4+
You can also upload predictions for unannotated images, letting you query them based on model predictions. This can help you prioritize which unlabeled data to label next.
5+
6+
Within Nucleus, Models work in the following way:
7+
8+
1. You first create a Model. You can do this just once and reuse the model on multiple datasets.
9+
2. You then upload predictions to a dataset.
10+
3. Trigger calculation of model metrics in order to view model debugging insights.
11+
12+
Doing the three steps above allows you to visualize model performance within Nucleus, or compare multiple models that have been run on the same Dataset.
13+
14+
15+
Note that you can always add more predictions to a dataset, but then you will need to re-run the calculation of metrics in order to have them be correct.
16+
17+
::
18+
19+
import nucleus
20+
21+
client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
22+
dataset = client.get_dataset("YOUR_DATASET_ID")
23+
prediction_1 = nucleus.BoxPrediction(
24+
label="label",
25+
x=0,
26+
y=0,
27+
width=10,
28+
height=10,
29+
reference_id="1",
30+
confidence=0.9,
31+
class_pdf={"label": 0.9, "other_label": 0.1},
32+
)
33+
prediction_2 = nucleus.BoxPrediction(
34+
label="label",
35+
x=0,
36+
y=0,
37+
width=10,
38+
height=10,
39+
reference_id="2",
40+
confidence=0.2,
41+
class_pdf={"label": 0.2, "other_label": 0.8},
42+
)
43+
model = client.add_model(
44+
name="My Model", reference_id="My-CNN", metadata={"timestamp": "121012401"}
45+
)
46+
# For small ingestions, we recommend synchronous ingestion
47+
response = dataset.upload_predictions(model, [prediction_1, prediction_2])
48+
# For large ingestions, we recommend asynchronous ingestion
49+
job = dataset.upload_predictions(
50+
[prediction_1, prediction_2], asynchronous=True
51+
)
52+
# Check current status
53+
job.status()
54+
# Sleep until ingestion is done
55+
job.sleep_until_complete()
56+
# Check errors
57+
job.errors()
58+
59+
dataset.calculate_evaluation_metrics(model)
60+
"""
161
from typing import List, Optional, Dict, Union
262
from .dataset import Dataset
363
from .prediction import (
@@ -15,6 +75,19 @@
1575

1676

1777
class Model:
78+
"""A model that can be used to upload predictions to a dataset.
79+
80+
Attributes:
81+
model_id: The scale-generated unique id for this model
82+
name: A human-readable name for the model
83+
reference_id: This is a unique, user-controlled ID for the model. This can be
84+
used, for example, to link to an external storage of models which may
85+
have its own id scheme.
86+
metadata: An arbitrary dictionary of additional data about this model that
87+
can be stored and retrieved. For example, you can store information
88+
about the hyperparameters used in training this model.
89+
"""
90+
1891
def __init__(
1992
self,
2093
model_id: str,
@@ -68,6 +141,16 @@ def create_run(
68141
metadata: Optional[Dict] = None,
69142
asynchronous: bool = False,
70143
) -> ModelRun:
144+
"""Note: this method, as well as model runs in general are now deprecated.
145+
146+
Instead models will automatically generate a model run when applied to a dataset
147+
using dataset.upload_predictions(model, predictions). Therefore there is no
148+
longer any need to create a model run, since you can upload predictions
149+
without needing to explicitly create a model run.
150+
151+
When uploading to a dataset twice using the same model, the same model run
152+
will be reused by Nucleus.
153+
"""
71154
payload: dict = {
72155
NAME_KEY: name,
73156
REFERENCE_ID_KEY: self.reference_id,

nucleus/model_run.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
"""
2+
Model Runs are deprecated and will be removed in a future version of the python client.
3+
It is now possible to upload model predictions without a need for creating a model run
4+
5+
For example:
6+
..code-block:: python
7+
import nucleus
8+
client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
9+
prediction_1 = nucleus.BoxPrediction(label="label", x=0, y=0, width=10, height=10, reference_id="1", confidence=0.9, class_pdf={'label': 0.9, 'other_label': 0.1})
10+
prediction_2 = nucleus.BoxPrediction(label="label", x=0, y=0, width=10, height=10, reference_id="2", confidence=0.2, class_pdf={'label': 0.2, 'other_label': 0.8})
11+
model = client.add_model(name="My Model", reference_id="My-CNN", metadata={"timestamp": "121012401"})
12+
response = dataset.upload_predictions(model, [prediction_1, prediction_2])
13+
"""
14+
15+
116
from typing import List, Optional, Union
217

318
import requests
@@ -26,8 +41,7 @@
2641

2742
class ModelRun:
2843
"""
29-
Model runs represent detections of a specific model on your dataset.
30-
Having an open model run is a prerequisite for uploading predictions to your dataset.
44+
This class is deprecated and will be removed from the python client.
3145
"""
3246

3347
def __init__(self, model_run_id: str, dataset_id: str, client):

nucleus/prediction.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""All of the prediction types supported. In general, prediction types are the same
2+
as annotation types, but come with additional, optional data that can be attached
3+
such as confidence or probability distributions.
4+
"""
15
from typing import Dict, Optional, List
26
from .annotation import (
37
BoxAnnotation,
@@ -66,6 +70,16 @@ def from_json(cls, payload: dict):
6670

6771

6872
class BoxPrediction(BoxAnnotation):
73+
"""A prediction of a bounding box
74+
75+
Attributes:
76+
confidence: 0-1 indicating the confidence of the prediciton
77+
class_pdf: An optional complete class probability distribution on this
78+
annotation. Each value should be between 0 and 1 (inclusive), and sum up to
79+
1 as a complete distribution. This can be useful for computing entropy to
80+
surface places where the model is most uncertain.
81+
"""
82+
6983
def __init__(
7084
self,
7185
label: str,
@@ -119,6 +133,16 @@ def from_json(cls, payload: dict):
119133

120134

121135
class PolygonPrediction(PolygonAnnotation):
136+
"""A prediction of a polygon
137+
138+
Attributes:
139+
confidence: 0-1 indicating the confidence of the prediciton
140+
class_pdf: An optional complete class probability distribution on this
141+
annotation. Each value should be between 0 and 1 (inclusive), and sum up to
142+
1 as a complete distribution. This can be useful for computing entropy to
143+
surface places where the model is most uncertain.
144+
"""
145+
122146
def __init__(
123147
self,
124148
label: str,
@@ -165,6 +189,16 @@ def from_json(cls, payload: dict):
165189

166190

167191
class CuboidPrediction(CuboidAnnotation):
192+
"""A prediction of 3D cuboid.
193+
194+
Attributes:
195+
confidence: 0-1 indicating the confidence of the prediciton
196+
class_pdf: An optional complete class probability distribution on this
197+
annotation. Each value should be between 0 and 1 (inclusive), and sum up to
198+
1 as a complete distribution. This can be useful for computing entropy to
199+
surface places where the model is most uncertain.
200+
"""
201+
168202
def __init__(
169203
self,
170204
label: str,
@@ -215,6 +249,8 @@ def from_json(cls, payload: dict):
215249

216250

217251
class CategoryPrediction(CategoryAnnotation):
252+
"""This class is not yet supported: Categorization support coming soon!"""
253+
218254
def __init__(
219255
self,
220256
label: str,

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ tqdm = "^4.41.0"
3838
dataclasses = { version = "^0.7", python = "^3.6.1, <3.7" }
3939
aiohttp = "^3.7.4"
4040
nest-asyncio = "^1.5.1"
41+
Sphinx = "^4.2.0"
4142

4243
[tool.poetry.dev-dependencies]
4344
poetry = "^1.1.5"
@@ -50,6 +51,7 @@ coverage = "^5.5"
5051
pre-commit = "^2.12.1"
5152
jupyterlab = "^3.1.10"
5253
absl-py = "^0.13.0"
54+
furo = "^2021.10.9"
5355

5456
[tool.pytest.ini_options]
5557
markers = [

0 commit comments

Comments
 (0)