Implemented, no tests yet

ardila · ardila · commit 228ef928b81d · 2021-04-19T08:05:47.000-07:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,21 +1,35 @@
+fail_fast: true
 repos:
--   repo: https://github.com/ambv/black
-    rev: stable
+-   repo: local
     hooks:
-    - id: black
+    - id: system
+      name: Black
+      entry: poetry run black .
+      pass_filenames: false
+      language: system
 
--   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.7.9
+-   repo: local
     hooks:
-    - id: flake8
+    - id: system
+      name: flake8
+      entry: poetry run flake8 .
+      pass_filenames: false
+      language: system
 
--   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v0.720'
+-   repo: local
     hooks:
-    -   id: mypy
-        args: [--ignore-missing-imports]
+    - id: system
+      name: pylint
+      entry: poetry run pylint nucleus
+      pass_filenames: false
+      language: system
 
--   repo: https://github.com/pre-commit/mirrors-pylint
-    rev: v2.3.1
+-   repo: local
     hooks:
-    -   id: pylint
+    - id: system
+      name: mypy
+      entry: poetry run mypy --ignore-missing-imports nucleus
+      pass_filenames: false
+      language: system
+
+
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -1,9 +1,7 @@
-from typing import List, Dict, Any, Optional, Union
+from typing import List, Dict, Any, Optional
 from .dataset_item import DatasetItem
 from .annotation import (
     Annotation,
-    BoxAnnotation,
-    PolygonAnnotation,
 )
 from .constants import (
     DATASET_NAME_KEY,
@@ -109,7 +107,7 @@ def create_model_run(
 
     def annotate(
         self,
-        annotations: List[Union[BoxAnnotation, PolygonAnnotation]],
+        annotations: List[Annotation],
         update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
         batch_size: int = 5000,
     ) -> dict:
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -1,4 +1,41 @@
-from typing import List
+from __future__ import annotations
+
+from typing import List, Iterable, Set, Tuple, Optional
+from nucleus.dataset_item import DatasetItem
+from nucleus.annotation import Annotation
+
+from .constants import DEFAULT_ANNOTATION_UPDATE_MODE
+
+
+def check_annotations_are_in_slice(
+    annotations: List[Annotation], slice_to_check: Slice
+) -> Tuple[bool, Set[str], Set[str]]:
+    """Check membership of the annotation targets within this slice.
+
+    annotations: Annnotations with ids referring to targets.
+    slice: The slice to check against.
+    """
+    info = slice_to_check.info()
+    item_ids_not_found_in_slice = {
+        annotation.item_id
+        for annotation in annotations
+        if annotation.item_id is not None
+    }.difference({item_metadata["id"] for item_metadata in info})
+    reference_ids_not_found_in_slice = {
+        annotation.reference_id
+        for annotation in annotations
+        if annotation.reference_id is not None
+    }.difference({item_metadata["reference_id"] for item_metadata in info})
+    if item_ids_not_found_in_slice or reference_ids_not_found_in_slice:
+        annotations_are_in_slice = False
+    else:
+        annotations_are_in_slice = True
+
+    return (
+        annotations_are_in_slice,
+        item_ids_not_found_in_slice,
+        reference_ids_not_found_in_slice,
+    )
 
 
 class Slice:
@@ -9,6 +46,7 @@ class Slice:
     def __init__(self, slice_id: str, client):
         self.slice_id = slice_id
         self._client = client
+        self._dataset_id = None
 
     def __repr__(self):
         return f"Slice(slice_id='{self.slice_id}', client={self._client})"
@@ -19,6 +57,13 @@ def __eq__(self, other):
                 return True
         return False
 
+    @property
+    def dataset_id(self):
+        """The id of the dataset this slice belongs to."""
+        if self._dataset_id is None:
+            self.info()
+        return self._dataset_id
+
     def info(self) -> dict:
         """
         This endpoint provides information about specified slice.
@@ -30,7 +75,9 @@ def info(self) -> dict:
             "dataset_items",
         }
         """
-        return self._client.slice_info(self.slice_id)
+        info = self._client.slice_info(self.slice_id)
+        self._dataset_id = info["dataset_id"]
+        return info
 
     def append(
         self,
@@ -57,3 +104,52 @@ def append(
             reference_ids=reference_ids,
         )
         return response
+
+    def items_generator(self) -> Iterable[DatasetItem]:
+        """Returns an iterable of DatasetItems in this slice."""
+        info = self.info()
+        for item_metadata in info["dataset_items"]:
+            yield self._client.dataitem_loc(
+                self,
+                dataset_id=info["dataset_id"],
+                dataset_item_id=item_metadata["id"],
+            )
+
+    def items(self) -> List[DatasetItem]:
+        """Returns a list of all DatasetItems in this slice."""
+        return list(self.items_generator())
+
+    def annotate(
+        self,
+        annotations: List[Annotation],
+        update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
+        batch_size: int = 5000,
+        strict=True,
+    ):
+        """Update annotations within this slice.
+
+        Args:
+            annotations: List of annotations to upload
+            batch_size: How many annotations to send per request.
+            strict: Whether to first check that the annotations belong to this slice.
+                Set to false to avoid this check and speed up upload.
+        """
+        if strict:
+            (
+                annotations_are_in_slice,
+                item_ids_not_found_in_slice,
+                reference_ids_not_found_in_slice,
+            ) = check_annotations_are_in_slice(annotations, self)
+            if not annotations_are_in_slice:
+                message = "Not all annotations are in this slice.\n"
+                if item_ids_not_found_in_slice:
+                    message += f"Item ids not found in slice: {item_ids_not_found_in_slice} \n"
+                if reference_ids_not_found_in_slice:
+                    message += f"Reference ids not found in slice: {reference_ids_not_found_in_slice}"
+                raise ValueError(message)
+        self._client.annotate_dataset(
+            dataset_id=self.dataset_id,
+            annotations=annotations,
+            update=update,
+            batch_size=batch_size,
+        )