@@ -27,7 +27,7 @@ def big_dataset(dataset: Dataset, image_url):
27
27
"row_data" : image_url ,
28
28
"external_id" : "my-image"
29
29
},
30
- ] * 500 )
30
+ ] * 100 )
31
31
task .wait_till_done ()
32
32
33
33
yield dataset
@@ -105,24 +105,28 @@ def test_bulk_delete_datarow_metadata(datarow, mdo):
105
105
106
106
def test_bulk_partial_delete_datarow_metadata (datarow , mdo ):
107
107
"""Delete a single from metadata"""
108
- assert not len (datarow .metadata ["fields" ])
108
+ n_fields = len (datarow .metadata ["fields" ])
109
109
110
110
metadata = make_metadata (datarow .uid )
111
111
mdo .bulk_upsert ([metadata ])
112
112
113
- assert len (datarow .metadata ["fields" ])
113
+ assert len (datarow .metadata ["fields" ]) == ( n_fields + 5 )
114
114
115
115
mdo .bulk_delete ([
116
116
DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [TEXT_SCHEMA_ID ])
117
117
])
118
118
119
- assert len (datarow .metadata ["fields" ]) == 4
119
+ assert len (datarow .metadata ["fields" ]) == ( n_fields + 4 )
120
120
121
121
122
122
@pytest .mark .slow
123
123
def test_large_bulk_delete_datarow_metadata (big_dataset , mdo ):
124
124
metadata = []
125
- for dr in big_dataset .export_data_rows ():
125
+ n_fields_start = 0
126
+ for idx , dr in enumerate (big_dataset .export_data_rows ()):
127
+ if idx == 0 :
128
+ n_fields_start = len (dr .metadata ["fields" ])
129
+
126
130
metadata .append (
127
131
DataRowMetadata (data_row_id = dr .uid ,
128
132
fields = [
@@ -148,24 +152,26 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
148
152
errors = mdo .bulk_delete (deletes )
149
153
assert len (errors ) == 0
150
154
for dr in big_dataset .export_data_rows ():
151
- assert len (dr .metadata ["fields" ]) == 1
155
+ assert len (dr .metadata ["fields" ]) == 1 + n_fields_start
152
156
break
153
157
154
158
155
159
def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , mdo ):
156
160
"""test bulk deletes for non non fields"""
157
- assert not len (datarow .metadata ["fields" ])
161
+ n_fields = len (datarow .metadata ["fields" ])
158
162
metadata = make_metadata (datarow .uid )
159
163
metadata .fields = [
160
164
m for m in metadata .fields if m .schema_id == SPLIT_SCHEMA_ID
161
165
]
162
166
mdo .bulk_upsert ([metadata ])
163
- assert len (datarow .metadata ["fields" ])
167
+ assert len (datarow .metadata ["fields" ]) == len (
168
+ set ([x .schema_id for x in metadata .fields ] +
169
+ [x ['schema_id' ] for x in datarow .metadata ["fields" ]]))
164
170
165
171
mdo .bulk_delete ([
166
172
DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [SPLIT_SCHEMA_ID ])
167
173
])
168
- assert not len (datarow .metadata ["fields" ])
174
+ assert len (datarow .metadata ["fields" ]) == n_fields
169
175
170
176
171
177
def test_raise_enum_upsert_schema_error (datarow , mdo ):
@@ -193,7 +199,6 @@ def test_upsert_non_existent_schema_id(datarow, mdo):
193
199
194
200
195
201
def test_delete_non_existent_schema_id (datarow , mdo ):
196
- assert not len (datarow .metadata ["fields" ])
197
202
mdo .bulk_delete ([
198
203
DeleteDataRowMetadata (data_row_id = datarow .uid ,
199
204
fields = [EMBEDDING_SCHEMA_ID ])
@@ -204,15 +209,18 @@ def test_delete_non_existent_schema_id(datarow, mdo):
204
209
@pytest .mark .slow
205
210
def test_large_bulk_delete_non_existent_schema_id (big_dataset , mdo ):
206
211
deletes = []
207
- for dr in big_dataset .export_data_rows ():
212
+ n_fields_start = 0
213
+ for idx , dr in enumerate (big_dataset .export_data_rows ()):
214
+ if idx == 0 :
215
+ n_fields_start = len (dr .metadata ["fields" ])
208
216
deletes .append (
209
217
DeleteDataRowMetadata (data_row_id = dr .uid ,
210
218
fields = [EMBEDDING_SCHEMA_ID ]))
211
219
errors = mdo .bulk_delete (deletes )
212
220
assert len (errors ) == 0
213
221
214
222
for dr in big_dataset .export_data_rows ():
215
- assert not len (dr .metadata ["fields" ])
223
+ assert len (dr .metadata ["fields" ]) == n_fields_start
216
224
break
217
225
218
226
0 commit comments