Explain EvaluationCriterion "magic" and documenting Pydantic usage (#198)

gatli · web-flow · commit 529fb01c8d9b · 2022-01-18T17:56:14.000+01:00
* Add initial README around EvaluationCriterion

* Add Pydantic section to README.md

* Move EvaluationCriteria README to docstring

* Minor markdown problem
diff --git a/README.md b/README.md
@@ -62,6 +62,90 @@ poetry run pytest tests/test_dataset.py
 poetry run pytest -m "not integration"
 ```
 
+## Pydantic Models
+
+Prefer using [Pydantic](https://pydantic-docs.helpmanual.io/usage/models/) models rather than creating raw dictionaries 
+or dataclasses to send or receive over the wire as JSONs. Pydantic is created with data validation in mind and provides very clear error 
+messages when it encounters a problem with the payload.
+
+The Pydantic model(s) should mirror the payload to send. To represent a JSON payload that looks like this:
+```json
+{
+  "example_json_with_info": {
+      "metadata": {
+        "frame": 0
+      },
+      "reference_id": "frame0",
+      "url": "s3://example/scale_nucleus/2021/lidar/0038711321865000.json",
+      "type": "pointcloud"
+    },
+  "example_image_with_info": {
+      "metadata": {
+        "author": "Picasso"
+      },
+      "reference_id": "frame0",
+      "url": "s3://bucket/0038711321865000.jpg",
+      "type": "image"
+    },
+}
+```
+
+Could be represented as the following structure. Note that the field names map to the JSON keys and the usage of field 
+validators (`@validator`).
+
+```python
+import os.path
+from pydantic import BaseModel, validator
+from typing import Literal
+
+
+class JsonWithInfo(BaseModel):
+    metadata: dict  # any dict is valid
+    reference_id: str
+    url: str
+    type: Literal["pointcloud", "recipe"]
+
+    @validator("url")
+    def has_json_extension(cls, v):
+        if not v.endswith(".json"):
+            raise ValueError(f"Expected '.json' extension got {v}")
+        return v
+
+
+class ImageWithInfo(BaseModel):
+    metadata: dict  # any dict is valid
+    reference_id: str
+    url: str
+    type: Literal["image", "mask"]
+
+    @validator("url")
+    def has_valid_extension(cls, v):
+        valid_extensions = {".jpg", ".jpeg", ".png", ".tiff"}
+        _, extension = os.path.splitext(v)
+        if extension not in valid_extensions:
+            raise ValueError(f"Expected extension in {valid_extensions} got {v}")
+        return v
+
+
+class ExampleNestedModel(BaseModel):
+    example_json_with_info: JsonWithInfo
+    example_image_with_info: ImageWithInfo
+
+# Usage:
+import requests
+payload = requests.get("/example")
+parsed_model = ExampleNestedModel.parse_obj(payload.json())
+requests.post("example/post_to", json=parsed_model.dict())
+```
+
+
+### Migrating to Pydantic
+- When migrating an interface from a dictionary use `nucleus.pydantic_base.DictCompatibleModel`. That allows you to get
+the benefits of Pydantic but maintaints backwards compatibility with a Python dictionary by delegating `__getitem__` to 
+fields.
+- When migrating a frozen dataclass use `nucleus.pydantic_base.ImmutableModel`. That is a base class set up to be 
+immutable after initialization.
+
 **Updating documentation:**
 We use [Sphinx](https://www.sphinx-doc.org/en/master/) to autogenerate our API Reference from docstrings.
 
diff --git a/nucleus/modelci/__init__.py b/nucleus/modelci/__init__.py
@@ -3,6 +3,7 @@
 __all__ = [
     "ModelCI",
     "UnitTest",
+    "EvaluationCriterion",
 ]
 
 from .client import ModelCI
diff --git a/nucleus/modelci/client.py b/nucleus/modelci/client.py
@@ -67,8 +67,8 @@ def create_unit_test(
         Args:
             name: unique name of test
             slice_id: id of (pre-defined) slice of items to evaluate test on.
-            evaluation_criteria: Pass/fail criteria for the test. Created with a comparison with an eval functions.
-                See :class:`eval_functions`.
+            evaluation_criteria: :class:`EvaluationCriterion` defines a pass/fail criteria for the test. Created with a
+                comparison with an eval functions. See :class:`eval_functions`.
 
         Returns:
             Created UnitTest object.
@@ -117,7 +117,7 @@ def delete_unit_test(self, unit_test_id: str) -> bool:
             client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
             unit_test = client.modelci.list_unit_tests()[0]
 
-            success = client.modelci.create_unit_test(unit_test.id)
+            success = client.modelci.delete_unit_test(unit_test.id)
 
         Args:
             unit_test_id: unique ID of unit test
diff --git a/nucleus/modelci/data_transfer_objects/eval_function.py b/nucleus/modelci/data_transfer_objects/eval_function.py
@@ -11,6 +11,41 @@ class EvaluationCriterion(ImmutableModel):
     An Evaluation Criterion is defined as an evaluation function, threshold, and comparator.
     It describes how to apply an evaluation function
 
+    Notes:
+        To define the evaluation criteria for a scenario test we've created some syntactic sugar to make it look closer to an
+        actual function call, and we also hide away implementation details related to our data model that simply are not clear,
+        UX-wise.
+
+        Instead of defining criteria like this::
+
+            from nucleus.modelci.data_transfer_objects.eval_function import (
+                EvaluationCriterion,
+                ThresholdComparison,
+            )
+
+            criteria = [
+                EvaluationCriterion(
+                    eval_function_id="ef_c6m1khygqk400918ays0",  # bbox_recall
+                    threshold_comparison=ThresholdComparison.GREATER_THAN,
+                    threshold=0.5,
+                ),
+            ]
+
+        we define it like this::
+
+            bbox_recall = client.modelci.eval_functions.bbox_recall
+            criteria = [
+                bbox_recall() > 0.5
+            ]
+
+        The chosen method allows us to document the available evaluation functions in an IDE friendly fashion and hides away
+        details like internal IDs (`"ef_...."`).
+
+        The actual `EvaluationCriterion` is created by overloading the comparison operators for the base class of an evaluation
+        function. Instead of the comparison returning a bool, we've made it create an `EvaluationCriterion` with the correct
+        signature to send over the wire to our API.
+
+
     Parameters:
         eval_function_id (str): ID of evaluation function
         threshold_comparison (:class:`ThresholdComparison`): comparator for evaluation. i.e. threshold=0.5 and threshold_comparator > implies that a test only passes if score > 0.5.

Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,7 @@`
`3`	`3`	`__all__ = [`
`4`	`4`	`"ModelCI",`
`5`	`5`	`"UnitTest",`
	`6`	`+ "EvaluationCriterion",`
`6`	`7`	`]`
`7`	`8`
`8`	`9`	`from .client import ModelCI`