update Slice docs (#140)

drakejwong · web-flow · commit 6dbeb78e0208 · 2021-10-25T11:35:05.000-07:00
* adjust sphinx config

* update gitignore

* update module docstring

* some docstring updates

* blocks

* checkpoint

* finish docstrings

* lint

* add example snippets
diff --git a/.gitignore b/.gitignore
@@ -70,7 +70,7 @@ instance/
 
 # Sphinx documentation
 docs/_build/
-docs/_autosummary/
+docs/api/
 
 # PyBuilder
 target/
diff --git a/docs/Makefile b/docs/Makefile
@@ -6,7 +6,7 @@
 SPHINXOPTS    ?=
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
-BUILDDIR      = build
+BUILDDIR      = _build
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/docs/api.rst b/docs/api.rst
@@ -2,7 +2,7 @@ API
 ===
 
 .. autosummary::
-   :toctree: _autosummary
+   :toctree: api
    :template: nucleus-module.rst
    :recursive:
 
diff --git a/docs/conf.py b/docs/conf.py
@@ -13,7 +13,7 @@
 import os
 import sys
 
-sys.path.insert(0, os.path.abspath("../.."))
+sys.path.insert(0, os.path.abspath("../../"))
 
 
 # -- Project information -----------------------------------------------------
@@ -61,4 +61,3 @@
 html_static_path = ["_static"]
 
 autosummary_generate = True
-autosummary_imported_members = True
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -1,3 +1,13 @@
+"""Slices are subsets of your Dataset that unlock curation and exploration workflows.
+
+Instead of thinking of your Datasets as collections of data, it is useful to think
+about them as a collection of Slices. For instance, your dataset may contain
+different weather scenarios, traffic conditions, or highway types.
+
+Perhaps your Models perform poorly on foggy weather scenarios; it is then useful
+to slice your dataset into a "foggy" slice, and fine-tune model performance on
+this slice until it reaches the performance you desire.
+"""
 from typing import Dict, Iterable, List, Set, Tuple, Union
 
 import requests
@@ -12,9 +22,7 @@
 
 
 class Slice:
-    """
-    Slice respesents a subset of your Dataset.
-    """
+    """A Slice represents a subset of DatasetItems in your Dataset."""
 
     def __init__(self, slice_id: str, client):
         self.slice_id = slice_id
@@ -32,21 +40,41 @@ def __eq__(self, other):
 
     @property
     def dataset_id(self):
-        """The id of the dataset this slice belongs to."""
+        """The ID of the Dataset to which the Slice belongs."""
         if self._dataset_id is None:
             self.info()
         return self._dataset_id
 
     def info(self) -> dict:
-        """
-        This endpoint provides information about specified slice.
-
-        :return:
-        {
-            "name": str,
-            "dataset_id": str,
-            "dataset_items",
-        }
+        """Retrieves info and items of the Slice. ::
+
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            slice = client.get_slice("slc_bx86ea222a6g057x4380")
+
+            slice.info()
+
+        Returns:
+            A dict mapping keys to the corresponding info retrieved. ::
+
+                {
+                    "name": "foggy",
+                    "dataset_id": "ds_bw6de8s84pe0vbn6p5zg"
+                    "dataset_items": [
+                        {
+                            "id": "di_bx79jc134x5w2janra10",
+                            "metadata": {},
+                            "ref_id": "image_ref_300000",
+                            "original_image_url": "s3://bucket-and-key"
+                        },
+                        {
+                            "id": "di_5x79jc134x5w2jantr30",
+                            "metadata": {},
+                            "ref_id": "image_ref_300001",
+                            "original_image_url": "s3://bucket-and-key"
+                        },
+                    ],
+                }
         """
         info = self._client.slice_info(self.slice_id)
         self._dataset_id = info["dataset_id"]
@@ -56,18 +84,27 @@ def append(
         self,
         reference_ids: List[str] = None,
     ) -> dict:
-        """
-        Appends to a slice from items already present in a dataset.
-        The caller must exclusively use either datasetItemIds or reference_ids
-        as a means of identifying items in the dataset.
+        """Appends existing DatasetItems from a Dataset to a Slice.
+
+        The endpoint expects a list of DatasetItem reference IDs which are set
+        at upload time. ::
 
-        :param
-        reference_ids: List[str],
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            slice = client.get_slice("slc_bx86ea222a6g057x4380")
 
-        :return:
-        {
-            "slice_id": str,
-        }
+            # You can append to a slice from existing reference_ids
+            slice.append(reference_ids=["image_300000", "image_300001"])
+
+        Args:
+            reference_ids:
+                A list of user-specified IDs for DatasetItems you wish to append.
+
+        Return:
+            A dict of the slice_id and the newly appended DatasetItem IDs. ::
+
+                {"slice_id": "slc_bx86ea222a6g057x4380",
+                  "new_items": ["di_bx79jc1z4x5wvjenra10", "di_bx79ha5z4x5wvjenr9y0"]}
         """
         response = self._client.append_to_slice(
             slice_id=self.slice_id,
@@ -78,15 +115,28 @@ def append(
     def items_and_annotation_generator(
         self,
     ) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
-        """Returns an iterable of all DatasetItems and Annotations in this slice.
+        """Provides a generator of all DatasetItems and Annotations in the slice. ::
+
+                import nucleus
+                client = NucleusClient("YOUR_SCALE_API_KEY")
+                slice = client.get_slice("slc_bx86ea222a6g057x4380")
+
+                slice.items_and_annotations_generator()
 
         Returns:
-            An iterable, where each item is a dict with two keys representing a row
-            in the dataset.
-            * One value in the dict is the DatasetItem, containing a reference to the
-                item that was annotated, for example an image_url.
-            * The other value is a dictionary containing all the annotations for this
-                dataset item, sorted by annotation type.
+            A generator where each element is a dict containing the DatasetItem
+            and all of its associated Annotations, grouped by type. ::
+
+                Iterable([
+                    {"item": DatasetItem(image_location="s3://bucket-and-key",
+                                         reference_id="image_ref_300000",
+                                         metadata={},
+                                         pointcloud_location=None,
+                                         upload_to_scale=True),
+                      "annotations": {"box": [BoxAnnotation ... ],
+                                      "segmentation": [SegmentationAnnotation ... ]},
+                    ...
+                ])
         """
         info = self.info()
         for item_metadata in info["dataset_items"]:
@@ -100,15 +150,28 @@ def items_and_annotation_generator(
     def items_and_annotations(
         self,
     ) -> List[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
-        """Returns a list of all DatasetItems and Annotations in this slice.
+        """Provides a list of all DatasetItems and Annotations in the Slice. ::
+
+                import nucleus
+                client = NucleusClient("YOUR_SCALE_API_KEY")
+                slice = client.get_slice("slc_bx86ea222a6g057x4380")
+
+                slice.items_and_annotations
 
         Returns:
-            A list, where each item is a dict with two keys representing a row
-            in the dataset.
-            * One value in the dict is the DatasetItem, containing a reference to the
-                item that was annotated.
-            * The other value is a dictionary containing all the annotations for this
-                dataset item, sorted by annotation type.
+            A list where each element is a dict containing the DatasetItem
+            and all of its associated Annotations, grouped by type (e.g. box). ::
+
+                [
+                    {"item": DatasetItem(image_location="s3://bucket-and-key",
+                                         reference_id="image_ref_300000",
+                                         metadata={},
+                                         pointcloud_location=None,
+                                         upload_to_scale=True),
+                      "annotations": {"box": [BoxAnnotation ... ],
+                                      "segmentation": [SegmentationAnnotation ... ]},
+                    ...
+                ]
         """
         api_payload = self._client.make_request(
             payload=None,
@@ -118,6 +181,27 @@ def items_and_annotations(
         return convert_export_payload(api_payload[EXPORTED_ROWS])
 
     def send_to_labeling(self, project_id: str):
+        """Send items in the Slice as tasks to a Scale labeling project.
+
+        This endpoint submits the items of the Slice as tasks to a pre-existing Scale Annotation project uniquely identified by projectId. Only projects of type General Image Annotation are currently supported. Additionally, in order for task submission to succeed, the project must have task instructions and geometries configured as project-level parameters.  In order to create a project or set project parameters, you must use the Scale Annotation API, which is documented here: `Scale Annotation API Documentation <https://docs.scale.com/reference/project-overview>`_. When the newly created annotation tasks are annotated, the annotations will be automatically reflected in the Nucleus platform.
+
+        For self-serve projects, user can choose to submit the slice as a calibration batch, which is recommended for brand new labeling projects.  For more information about calibration batches, please reference `Overview of Self Serve Workflow <https://docs.scale.com/reference/batch-overview>`_. Note: A batch can be either a calibration batch or a self label batch, but not both.
+
+        Note: Nucleus only supports bounding box, polygon, and line annotations. If the project parameters specify any other geometries (ellipses or points), those objects will be annotated, but they will not be reflected in Nucleus. ::
+
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            slice = client.get_slice("slc_bx86ea222a6g057x4380")
+
+            project_id = "2408bfb36443d50025f41bbd"
+            job = slice.send_to_labeling(project_id)
+            job.sleep_until_complete() # block until async job complete
+
+        Args:
+            project_id: A unique id of the target annotation project.
+            calibration_batch: (Relevant to Scale Rapid projects only) An optional boolean signaling whether to send as a "calibration batch" for taskers to preliminarily evaluate your project instructions and parameters.
+            self_label_batch: (Relevant to Scale Rapid projects only) An optional boolean signaling whether to send as a "self-label batch," in which your team can label internally through Scale Rapid.
+        """
         response = self._client.make_request(
             {}, f"slice/{self.slice_id}/{project_id}/send_to_labeling"
         )
@@ -126,13 +210,22 @@ def send_to_labeling(self, project_id: str):
     def export_embeddings(
         self,
     ) -> List[Dict[str, Union[str, List[float]]]]:
-        """Returns a pd.Dataframe-ready format of dataset embeddings.
+        """Provides a pd.DataFrame-like list of dataset embeddings. ::
+
+            import nucleus
+            client = nucleus.NucleusClient("YOUR_SCALE_API_KEY")
+            slice = client.get_slice("slc_bx86ea222a6g057x4380")
+
+            slice.export_embeddings()
 
         Returns:
-            A list, where each item is a dict with two keys representing a row
-            in the dataset.
-            * One value in the dict is the reference id
-            * The other value is a list of the embedding values
+            A list where each element is a columnar mapping ::
+
+                [
+                    {"embedding_vector": [-0.0022, 0.0457, ... ],
+                     "reference_id": "image_ref_300000"},
+                    ...
+                ]
         """
         api_payload = self._client.make_request(
             payload=None,
@@ -145,17 +238,20 @@ def export_embeddings(
 def check_annotations_are_in_slice(
     annotations: List[Annotation], slice_to_check: Slice
 ) -> Tuple[bool, Set[str]]:
-    """Check membership of the annotation targets within this slice.
+    """Checks whether the supplied Annotation objects exist in the supplied Slice.
 
-    annotations: Annnotations with ids referring to targets.
-    slice: The slice to check against.
+    This endpoint checks whether each Annotation object's reference ID (of the
+    parent DatasetItem) exists in the Slice.
 
+    Args:
+        annotations: Annnotations with ids referring to targets.
+        slice: The slice to check against.
 
     Returns:
-        A tuple, where the first element is true/false whether the annotations are all
-        in the slice.
-        The second element is the list of item_ids not in the slice.
-        The third element is the list of ref_ids not in the slice.
+        A tuple of two elements.
+
+        #. True if all Annotations are in the Slice, False otherwise;
+        #. List of reference IDs not in the Slice.
     """
     info = slice_to_check.info()
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,8 @@ coverage = "^5.5"
 pre-commit = "^2.12.1"
 jupyterlab = "^3.1.10"
 absl-py = "^0.13.0"
+Sphinx = "^4.2.0"
+sphinx-autobuild = "^2021.3.14"
 furo = "^2021.10.9"
 
 [tool.pytest.ini_options]