Skip to content

Commit 3eecdd7

Browse files
author
Diego Ardila
committed
merge master
2 parents 1672a14 + 41e93ca commit 3eecdd7

File tree

7 files changed

+47
-10
lines changed

7 files changed

+47
-10
lines changed

nucleus/dataset.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@
2222
REQUEST_ID_KEY,
2323
UPDATE_KEY,
2424
)
25-
from .dataset_item import DatasetItem, check_all_paths_remote
25+
from .dataset_item import (
26+
DatasetItem,
27+
check_all_paths_remote,
28+
check_for_duplicate_reference_ids,
29+
)
2630
from .payload_constructor import construct_model_run_creation_payload
2731

2832

@@ -191,6 +195,8 @@ def append(
191195
'ignored_items': int,
192196
}
193197
"""
198+
check_for_duplicate_reference_ids(dataset_items)
199+
194200
if asynchronous:
195201
check_all_paths_remote(dataset_items)
196202
request_id = uuid.uuid4().hex
@@ -199,8 +205,6 @@ def append(
199205
route=f"dataset/{self.id}/signedUrl/{request_id}",
200206
requests_command=requests.get,
201207
)
202-
print(response["signed_url"])
203-
204208
serialize_and_write_to_presigned_url(
205209
dataset_items, response["signed_url"]
206210
)

nucleus/dataset_item.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
from collections import Counter
12
import json
23
import os.path
34
from dataclasses import dataclass
4-
from typing import List, Optional
5+
from typing import Optional, Sequence
56

67
from .constants import (
78
DATASET_ITEM_ID_KEY,
@@ -58,10 +59,26 @@ def is_local_path(path: str) -> bool:
5859
return path_components[0] not in {"https:", "http:", "s3:", "gs:"}
5960

6061

61-
def check_all_paths_remote(dataset_items: List[DatasetItem]):
62+
def check_all_paths_remote(dataset_items: Sequence[DatasetItem]):
6263
for item in dataset_items:
6364
if is_local_path(item.image_location):
6465
raise ValueError(
6566
f"All paths must be remote, but {item.image_location} is either "
6667
"local, or a remote URL type that is not supported."
6768
)
69+
70+
71+
def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]):
72+
ref_ids = []
73+
for dataset_item in dataset_items:
74+
if dataset_item.reference_id is not None:
75+
ref_ids.append(dataset_item.reference_id)
76+
if len(ref_ids) != len(set(ref_ids)):
77+
duplicates = {
78+
f"{key}": f"Count: {value}"
79+
for key, value in Counter(ref_ids).items()
80+
}
81+
raise ValueError(
82+
"Duplicate reference ids found among dataset_items: %s"
83+
% duplicates
84+
)

tests/test_annotation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def dataset(CLIENT):
5757
yield ds
5858

5959
response = CLIENT.delete_dataset(ds.id)
60-
assert response == {}
60+
assert response == {"message": "Beginning dataset deletion..."}
6161

6262

6363
def test_box_gt_upload(dataset):

tests/test_dataset.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def test_dataset_create_and_delete(CLIENT):
9696

9797
# Deletion
9898
response = CLIENT.delete_dataset(ds.id)
99-
assert response == {}
99+
assert response == {"message": "Beginning dataset deletion..."}
100100

101101

102102
def test_dataset_append(dataset):
@@ -204,6 +204,22 @@ def test_dataset_list_autotags(CLIENT, dataset):
204204
assert autotag_response == []
205205

206206

207+
def test_raises_error_for_duplicate():
208+
fake_dataset = Dataset("fake", NucleusClient("fake"))
209+
with pytest.raises(ValueError) as error:
210+
fake_dataset.append(
211+
[
212+
DatasetItem("fake", "duplicate"),
213+
DatasetItem("fake", "duplicate"),
214+
]
215+
)
216+
assert (
217+
str(error.value)
218+
== "Duplicate reference ids found among dataset_items:"
219+
" {'duplicate': 'Count: 2'}"
220+
)
221+
222+
207223
def test_dataset_export_autotag_scores(CLIENT):
208224
# This test can only run for the test user who has an indexed dataset.
209225
# TODO: if/when we can create autotags via api, create one instead.

tests/test_indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def dataset(CLIENT):
3434
yield ds
3535

3636
response = CLIENT.delete_dataset(ds.id)
37-
assert response == {}
37+
assert response == {"message": "Beginning dataset deletion..."}
3838

3939

4040
def test_index_integration(dataset):

tests/test_prediction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def model_run(CLIENT):
6565
yield run
6666

6767
response = CLIENT.delete_dataset(ds.id)
68-
assert response == {}
68+
assert response == {"message": "Beginning dataset deletion..."}
6969
response = CLIENT.delete_model(model.id)
7070
assert response == {}
7171

tests/test_slice.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def dataset(CLIENT):
1616
yield ds
1717

1818
response = CLIENT.delete_dataset(ds.id)
19-
assert response == {}
19+
assert response == {"message": "Beginning dataset deletion..."}
2020

2121

2222
def test_reprs():

0 commit comments

Comments
 (0)