Skip to content

Commit 76be038

Browse files
committed
Added error check for duplicate reference ids
1 parent c4815d3 commit 76be038

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

nucleus/dataset.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import Counter
12
from typing import List, Dict, Any, Optional
23

34
from nucleus.utils import format_dataset_item_response
@@ -158,6 +159,19 @@ def append(
158159
'ignored_items': int,
159160
}
160161
"""
162+
ref_ids = []
163+
for dataset_item in dataset_items:
164+
if dataset_item.reference_id is not None:
165+
ref_ids.append(dataset_item.reference_id)
166+
if len(ref_ids) != len(set(ref_ids)):
167+
duplicates = {
168+
f"{key}": f"Count: {value}"
169+
for key, value in Counter(ref_ids).items()
170+
}
171+
raise ValueError(
172+
"Duplicate reference ids found among dataset_items: %s"
173+
% duplicates
174+
)
161175
return self._client.populate_dataset(
162176
self.id,
163177
dataset_items,

tests/test_dataset.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,19 @@ def test_dataset_list_autotags(CLIENT, dataset):
127127
# List of Autotags should be empty
128128
autotag_response = CLIENT.list_autotags(dataset.id)
129129
assert autotag_response == []
130+
131+
132+
def test_raises_error_for_duplicate():
133+
fake_dataset = Dataset("fake", NucleusClient("fake"))
134+
with pytest.raises(ValueError) as error:
135+
fake_dataset.append(
136+
[
137+
DatasetItem("fake", "duplicate"),
138+
DatasetItem("fake", "duplicate"),
139+
]
140+
)
141+
assert (
142+
str(error.value)
143+
== "Duplicate reference ids found among dataset_items:"
144+
" {'duplicate': 'Count: 2'}"
145+
)

0 commit comments

Comments
 (0)