Skip to content

Commit 41e93ca

Browse files
authored
Merge pull request #65 from scaleapi/da/validation_serialization
Add error check for duplicate reference ids
2 parents ec69608 + 9d38e59 commit 41e93ca

File tree

6 files changed

+37
-7
lines changed

6 files changed

+37
-7
lines changed

nucleus/dataset.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import Any, Dict, List, Optional
1+
from collections import Counter
2+
from typing import List, Dict, Any, Optional
23

34
import requests
45

@@ -178,6 +179,19 @@ def append(
178179
'ignored_items': int,
179180
}
180181
"""
182+
ref_ids = []
183+
for dataset_item in dataset_items:
184+
if dataset_item.reference_id is not None:
185+
ref_ids.append(dataset_item.reference_id)
186+
if len(ref_ids) != len(set(ref_ids)):
187+
duplicates = {
188+
f"{key}": f"Count: {value}"
189+
for key, value in Counter(ref_ids).items()
190+
}
191+
raise ValueError(
192+
"Duplicate reference ids found among dataset_items: %s"
193+
% duplicates
194+
)
181195
return self._client.populate_dataset(
182196
self.id,
183197
dataset_items,

tests/test_annotation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def dataset(CLIENT):
5757
yield ds
5858

5959
response = CLIENT.delete_dataset(ds.id)
60-
assert response == {}
60+
assert response == {"message": "Beginning dataset deletion..."}
6161

6262

6363
def test_box_gt_upload(dataset):

tests/test_dataset.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def dataset(CLIENT):
5858
yield ds
5959

6060
response = CLIENT.delete_dataset(ds.id)
61-
assert response == {}
61+
assert response == {"message": "Beginning dataset deletion..."}
6262

6363

6464
def test_dataset_create_and_delete(CLIENT):
@@ -73,7 +73,7 @@ def test_dataset_create_and_delete(CLIENT):
7373

7474
# Deletion
7575
response = CLIENT.delete_dataset(ds.id)
76-
assert response == {}
76+
assert response == {"message": "Beginning dataset deletion..."}
7777

7878

7979
def test_dataset_append(dataset):
@@ -138,6 +138,22 @@ def test_dataset_list_autotags(CLIENT, dataset):
138138
assert autotag_response == []
139139

140140

141+
def test_raises_error_for_duplicate():
142+
fake_dataset = Dataset("fake", NucleusClient("fake"))
143+
with pytest.raises(ValueError) as error:
144+
fake_dataset.append(
145+
[
146+
DatasetItem("fake", "duplicate"),
147+
DatasetItem("fake", "duplicate"),
148+
]
149+
)
150+
assert (
151+
str(error.value)
152+
== "Duplicate reference ids found among dataset_items:"
153+
" {'duplicate': 'Count: 2'}"
154+
)
155+
156+
141157
def test_dataset_export_autotag_scores(CLIENT):
142158
# This test can only run for the test user who has an indexed dataset.
143159
# TODO: if/when we can create autotags via api, create one instead.

tests/test_indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def dataset(CLIENT):
3434
yield ds
3535

3636
response = CLIENT.delete_dataset(ds.id)
37-
assert response == {}
37+
assert response == {"message": "Beginning dataset deletion..."}
3838

3939

4040
def test_index_integration(dataset):

tests/test_prediction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def model_run(CLIENT):
6565
yield run
6666

6767
response = CLIENT.delete_dataset(ds.id)
68-
assert response == {}
68+
assert response == {"message": "Beginning dataset deletion..."}
6969
response = CLIENT.delete_model(model.id)
7070
assert response == {}
7171

tests/test_slice.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def dataset(CLIENT):
1616
yield ds
1717

1818
response = CLIENT.delete_dataset(ds.id)
19-
assert response == {}
19+
assert response == {"message": "Beginning dataset deletion..."}
2020

2121

2222
def test_reprs():

0 commit comments

Comments
 (0)