Skip to content

Commit dd046bc

Browse files
author
Matt Sokoloff
committed
fix metadata tests
1 parent 33554c2 commit dd046bc

File tree

1 file changed

+33
-38
lines changed

1 file changed

+33
-38
lines changed

tests/integration/test_data_row_metadata.py

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from datetime import datetime
2+
import time
3+
from attr import field
24

35
import pytest
46

@@ -34,6 +36,17 @@ def big_dataset(dataset: Dataset, image_url):
3436
dataset.delete()
3537

3638

39+
def wait_for_embeddings_svc(data_row_ids, mdo):
40+
for idx in range(5):
41+
if all([
42+
len(metadata.fields)
43+
for metadata in mdo.bulk_export(data_row_ids)
44+
]):
45+
return
46+
time.sleep((idx + 1)**2)
47+
raise Exception("Embedding svc failed to update metadata.")
48+
49+
3750
def make_metadata(dr_id) -> DataRowMetadata:
3851
embeddings = [0.0] * 128
3952
msg = "A message"
@@ -59,25 +72,21 @@ def test_get_datarow_metadata_ontology(mdo):
5972

6073

6174
def test_bulk_upsert_datarow_metadata(datarow, mdo: DataRowMetadataOntology):
75+
wait_for_embeddings_svc([datarow.uid], mdo)
6276
metadata = make_metadata(datarow.uid)
6377
mdo.bulk_upsert([metadata])
6478
assert len(mdo.bulk_export([datarow.uid]))
65-
assert len(mdo.bulk_export([datarow.uid])[0].fields)
66-
67-
68-
def test_parse_upsert_datarow_metadata(datarow, mdo: DataRowMetadataOntology):
69-
metadata = make_metadata(datarow.uid)
70-
mdo.bulk_upsert([metadata])
71-
assert mdo.bulk_export([datarow.uid])
79+
assert len(mdo.bulk_export([datarow.uid])[0].fields) == 5
7280

7381

7482
@pytest.mark.slow
7583
def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo):
7684
metadata = []
7785
data_row_ids = []
78-
for dr in big_dataset.data_rows():
79-
metadata.append(make_metadata(dr.uid))
80-
data_row_ids.append(dr.uid)
86+
data_row_ids = [dr.uid for dr in big_dataset.data_rows()]
87+
wait_for_embeddings_svc(data_row_ids, mdo)
88+
for data_row_id in data_row_ids:
89+
metadata.append(make_metadata(data_row_id))
8190
errors = mdo.bulk_upsert(metadata)
8291
assert len(errors) == 0
8392

@@ -120,10 +129,13 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
120129

121130

122131
def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
132+
123133
metadata = []
124-
for dr in big_dataset.data_rows():
134+
data_row_ids = [dr.uid for dr in big_dataset.data_rows()]
135+
wait_for_embeddings_svc(data_row_ids, mdo)
136+
for data_row_id in data_row_ids:
125137
metadata.append(
126-
DataRowMetadata(data_row_id=dr.uid,
138+
DataRowMetadata(data_row_id=data_row_id,
127139
fields=[
128140
DataRowMetadataField(
129141
schema_id=EMBEDDING_SCHEMA_ID,
@@ -135,25 +147,26 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
135147
assert len(errors) == 0
136148

137149
deletes = []
138-
for dr in big_dataset.data_rows():
150+
for data_row_id in data_row_ids:
139151
deletes.append(
140152
DeleteDataRowMetadata(
141-
data_row_id=dr.uid,
153+
data_row_id=data_row_id,
142154
fields=[
143155
EMBEDDING_SCHEMA_ID, #
144156
CAPTURE_DT_SCHEMA_ID
145157
]))
146-
147158
errors = mdo.bulk_delete(deletes)
148159
assert len(errors) == 0
149-
for dr in big_dataset.data_rows():
150-
# 1 remaining because only the embeddings id overlaps
151-
assert len(mdo.bulk_export([dr.uid])[0].fields) == 1
160+
for data_row_id in data_row_ids:
161+
# 2 remaining because we delete the user provided embedding but text and labelbox generated embeddings still exist
162+
fields = mdo.bulk_export([data_row_id])[0].fields
163+
assert len(fields) == 2
164+
assert EMBEDDING_SCHEMA_ID not in [field.schema_id for field in fields]
152165

153166

154167
def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
155168
"""test bulk deletes for non non fields"""
156-
n_fields = len(mdo.bulk_export([datarow.uid])[0].fields)
169+
wait_for_embeddings_svc([datarow.uid], mdo)
157170
metadata = make_metadata(datarow.uid)
158171
metadata.fields = [
159172
m for m in metadata.fields if m.schema_id == SPLIT_SCHEMA_ID
@@ -167,7 +180,7 @@ def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
167180
mdo.bulk_delete([
168181
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
169182
])
170-
assert len(mdo.bulk_export([datarow.uid])[0].fields) == n_fields
183+
assert len(mdo.bulk_export([datarow.uid])[0].fields) == 1
171184

172185

173186
def test_raise_enum_upsert_schema_error(datarow, mdo):
@@ -202,24 +215,6 @@ def test_delete_non_existent_schema_id(datarow, mdo):
202215
# No message is returned
203216

204217

205-
@pytest.mark.slow
206-
def test_large_bulk_delete_non_existent_schema_id(big_dataset, mdo):
207-
deletes = []
208-
n_fields_start = 0
209-
for idx, dr in enumerate(big_dataset.data_rows()):
210-
if idx == 0:
211-
n_fields_start = len(mdo.bulk_export([dr.uid])[0].fields)
212-
deletes.append(
213-
DeleteDataRowMetadata(data_row_id=dr.uid,
214-
fields=[EMBEDDING_SCHEMA_ID]))
215-
errors = mdo.bulk_delete(deletes)
216-
assert len(errors) == 0
217-
218-
for dr in big_dataset.export_data_rows():
219-
assert len(mdo.bulk_export([dr.uid])[0].fields) == n_fields_start
220-
break
221-
222-
223218
def test_parse_raw_metadata(mdo):
224219
example = {
225220
'dataRowId':

0 commit comments

Comments
 (0)