File tree Expand file tree Collapse file tree 2 files changed +30
-0
lines changed Expand file tree Collapse file tree 2 files changed +30
-0
lines changed Original file line number Diff line number Diff line change
1
+ from collections import Counter
1
2
from typing import List , Dict , Any , Optional
2
3
3
4
from nucleus .utils import format_dataset_item_response
@@ -158,6 +159,19 @@ def append(
158
159
'ignored_items': int,
159
160
}
160
161
"""
162
+ ref_ids = []
163
+ for dataset_item in dataset_items :
164
+ if dataset_item .reference_id is not None :
165
+ ref_ids .append (dataset_item .reference_id )
166
+ if len (ref_ids ) != len (set (ref_ids )):
167
+ duplicates = {
168
+ f"{ key } " : f"Count: { value } "
169
+ for key , value in Counter (ref_ids ).items ()
170
+ }
171
+ raise ValueError (
172
+ "Duplicate reference ids found among dataset_items: %s"
173
+ % duplicates
174
+ )
161
175
return self ._client .populate_dataset (
162
176
self .id ,
163
177
dataset_items ,
Original file line number Diff line number Diff line change @@ -127,3 +127,19 @@ def test_dataset_list_autotags(CLIENT, dataset):
127
127
# List of Autotags should be empty
128
128
autotag_response = CLIENT .list_autotags (dataset .id )
129
129
assert autotag_response == []
130
+
131
+
132
+ def test_raises_error_for_duplicate ():
133
+ fake_dataset = Dataset ("fake" , NucleusClient ("fake" ))
134
+ with pytest .raises (ValueError ) as error :
135
+ fake_dataset .append (
136
+ [
137
+ DatasetItem ("fake" , "duplicate" ),
138
+ DatasetItem ("fake" , "duplicate" ),
139
+ ]
140
+ )
141
+ assert (
142
+ str (error .value )
143
+ == "Duplicate reference ids found among dataset_items:"
144
+ " {'duplicate': 'Count: 2'}"
145
+ )
You can’t perform that action at this time.
0 commit comments