Skip to content

Commit 228ef92

Browse files
committed
Implemented, no tests yet
1 parent b6c2f0a commit 228ef92

File tree

3 files changed

+127
-19
lines changed

3 files changed

+127
-19
lines changed

.pre-commit-config.yaml

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,35 @@
1+
fail_fast: true
12
repos:
2-
- repo: https://github.com/ambv/black
3-
rev: stable
3+
- repo: local
44
hooks:
5-
- id: black
5+
- id: system
6+
name: Black
7+
entry: poetry run black .
8+
pass_filenames: false
9+
language: system
610

7-
- repo: https://gitlab.com/pycqa/flake8
8-
rev: 3.7.9
11+
- repo: local
912
hooks:
10-
- id: flake8
13+
- id: system
14+
name: flake8
15+
entry: poetry run flake8 .
16+
pass_filenames: false
17+
language: system
1118

12-
- repo: https://github.com/pre-commit/mirrors-mypy
13-
rev: 'v0.720'
19+
- repo: local
1420
hooks:
15-
- id: mypy
16-
args: [--ignore-missing-imports]
21+
- id: system
22+
name: pylint
23+
entry: poetry run pylint nucleus
24+
pass_filenames: false
25+
language: system
1726

18-
- repo: https://github.com/pre-commit/mirrors-pylint
19-
rev: v2.3.1
27+
- repo: local
2028
hooks:
21-
- id: pylint
29+
- id: system
30+
name: mypy
31+
entry: poetry run mypy --ignore-missing-imports nucleus
32+
pass_filenames: false
33+
language: system
34+
35+

nucleus/dataset.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
from typing import List, Dict, Any, Optional, Union
1+
from typing import List, Dict, Any, Optional
22
from .dataset_item import DatasetItem
33
from .annotation import (
44
Annotation,
5-
BoxAnnotation,
6-
PolygonAnnotation,
75
)
86
from .constants import (
97
DATASET_NAME_KEY,
@@ -109,7 +107,7 @@ def create_model_run(
109107

110108
def annotate(
111109
self,
112-
annotations: List[Union[BoxAnnotation, PolygonAnnotation]],
110+
annotations: List[Annotation],
113111
update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
114112
batch_size: int = 5000,
115113
) -> dict:

nucleus/slice.py

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,41 @@
1-
from typing import List
1+
from __future__ import annotations
2+
3+
from typing import List, Iterable, Set, Tuple, Optional
4+
from nucleus.dataset_item import DatasetItem
5+
from nucleus.annotation import Annotation
6+
7+
from .constants import DEFAULT_ANNOTATION_UPDATE_MODE
8+
9+
10+
def check_annotations_are_in_slice(
11+
annotations: List[Annotation], slice_to_check: Slice
12+
) -> Tuple[bool, Set[str], Set[str]]:
13+
"""Check membership of the annotation targets within this slice.
14+
15+
annotations: Annnotations with ids referring to targets.
16+
slice: The slice to check against.
17+
"""
18+
info = slice_to_check.info()
19+
item_ids_not_found_in_slice = {
20+
annotation.item_id
21+
for annotation in annotations
22+
if annotation.item_id is not None
23+
}.difference({item_metadata["id"] for item_metadata in info})
24+
reference_ids_not_found_in_slice = {
25+
annotation.reference_id
26+
for annotation in annotations
27+
if annotation.reference_id is not None
28+
}.difference({item_metadata["reference_id"] for item_metadata in info})
29+
if item_ids_not_found_in_slice or reference_ids_not_found_in_slice:
30+
annotations_are_in_slice = False
31+
else:
32+
annotations_are_in_slice = True
33+
34+
return (
35+
annotations_are_in_slice,
36+
item_ids_not_found_in_slice,
37+
reference_ids_not_found_in_slice,
38+
)
239

340

441
class Slice:
@@ -9,6 +46,7 @@ class Slice:
946
def __init__(self, slice_id: str, client):
1047
self.slice_id = slice_id
1148
self._client = client
49+
self._dataset_id = None
1250

1351
def __repr__(self):
1452
return f"Slice(slice_id='{self.slice_id}', client={self._client})"
@@ -19,6 +57,13 @@ def __eq__(self, other):
1957
return True
2058
return False
2159

60+
@property
61+
def dataset_id(self):
62+
"""The id of the dataset this slice belongs to."""
63+
if self._dataset_id is None:
64+
self.info()
65+
return self._dataset_id
66+
2267
def info(self) -> dict:
2368
"""
2469
This endpoint provides information about specified slice.
@@ -30,7 +75,9 @@ def info(self) -> dict:
3075
"dataset_items",
3176
}
3277
"""
33-
return self._client.slice_info(self.slice_id)
78+
info = self._client.slice_info(self.slice_id)
79+
self._dataset_id = info["dataset_id"]
80+
return info
3481

3582
def append(
3683
self,
@@ -57,3 +104,52 @@ def append(
57104
reference_ids=reference_ids,
58105
)
59106
return response
107+
108+
def items_generator(self) -> Iterable[DatasetItem]:
109+
"""Returns an iterable of DatasetItems in this slice."""
110+
info = self.info()
111+
for item_metadata in info["dataset_items"]:
112+
yield self._client.dataitem_loc(
113+
self,
114+
dataset_id=info["dataset_id"],
115+
dataset_item_id=item_metadata["id"],
116+
)
117+
118+
def items(self) -> List[DatasetItem]:
119+
"""Returns a list of all DatasetItems in this slice."""
120+
return list(self.items_generator())
121+
122+
def annotate(
123+
self,
124+
annotations: List[Annotation],
125+
update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
126+
batch_size: int = 5000,
127+
strict=True,
128+
):
129+
"""Update annotations within this slice.
130+
131+
Args:
132+
annotations: List of annotations to upload
133+
batch_size: How many annotations to send per request.
134+
strict: Whether to first check that the annotations belong to this slice.
135+
Set to false to avoid this check and speed up upload.
136+
"""
137+
if strict:
138+
(
139+
annotations_are_in_slice,
140+
item_ids_not_found_in_slice,
141+
reference_ids_not_found_in_slice,
142+
) = check_annotations_are_in_slice(annotations, self)
143+
if not annotations_are_in_slice:
144+
message = "Not all annotations are in this slice.\n"
145+
if item_ids_not_found_in_slice:
146+
message += f"Item ids not found in slice: {item_ids_not_found_in_slice} \n"
147+
if reference_ids_not_found_in_slice:
148+
message += f"Reference ids not found in slice: {reference_ids_not_found_in_slice}"
149+
raise ValueError(message)
150+
self._client.annotate_dataset(
151+
dataset_id=self.dataset_id,
152+
annotations=annotations,
153+
update=update,
154+
batch_size=batch_size,
155+
)

0 commit comments

Comments
 (0)