1
1
from datetime import datetime
2
+ import time
3
+ from attr import field
2
4
3
5
import pytest
4
6
@@ -34,6 +36,17 @@ def big_dataset(dataset: Dataset, image_url):
34
36
dataset .delete ()
35
37
36
38
39
+ def wait_for_embeddings_svc (data_row_ids , mdo ):
40
+ for idx in range (5 ):
41
+ if all ([
42
+ len (metadata .fields )
43
+ for metadata in mdo .bulk_export (data_row_ids )
44
+ ]):
45
+ return
46
+ time .sleep ((idx + 1 )** 2 )
47
+ raise Exception ("Embedding svc failed to update metadata." )
48
+
49
+
37
50
def make_metadata (dr_id ) -> DataRowMetadata :
38
51
embeddings = [0.0 ] * 128
39
52
msg = "A message"
@@ -59,25 +72,21 @@ def test_get_datarow_metadata_ontology(mdo):
59
72
60
73
61
74
def test_bulk_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
75
+ wait_for_embeddings_svc ([datarow .uid ], mdo )
62
76
metadata = make_metadata (datarow .uid )
63
77
mdo .bulk_upsert ([metadata ])
64
78
assert len (mdo .bulk_export ([datarow .uid ]))
65
- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields )
66
-
67
-
68
- def test_parse_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
69
- metadata = make_metadata (datarow .uid )
70
- mdo .bulk_upsert ([metadata ])
71
- assert mdo .bulk_export ([datarow .uid ])
79
+ assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 5
72
80
73
81
74
82
@pytest .mark .slow
75
83
def test_large_bulk_upsert_datarow_metadata (big_dataset , mdo ):
76
84
metadata = []
77
85
data_row_ids = []
78
- for dr in big_dataset .data_rows ():
79
- metadata .append (make_metadata (dr .uid ))
80
- data_row_ids .append (dr .uid )
86
+ data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
87
+ wait_for_embeddings_svc (data_row_ids , mdo )
88
+ for data_row_id in data_row_ids :
89
+ metadata .append (make_metadata (data_row_id ))
81
90
errors = mdo .bulk_upsert (metadata )
82
91
assert len (errors ) == 0
83
92
@@ -120,10 +129,13 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
120
129
121
130
122
131
def test_large_bulk_delete_datarow_metadata (big_dataset , mdo ):
132
+
123
133
metadata = []
124
- for dr in big_dataset .data_rows ():
134
+ data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
135
+ wait_for_embeddings_svc (data_row_ids , mdo )
136
+ for data_row_id in data_row_ids :
125
137
metadata .append (
126
- DataRowMetadata (data_row_id = dr . uid ,
138
+ DataRowMetadata (data_row_id = data_row_id ,
127
139
fields = [
128
140
DataRowMetadataField (
129
141
schema_id = EMBEDDING_SCHEMA_ID ,
@@ -135,25 +147,26 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
135
147
assert len (errors ) == 0
136
148
137
149
deletes = []
138
- for dr in big_dataset . data_rows () :
150
+ for data_row_id in data_row_ids :
139
151
deletes .append (
140
152
DeleteDataRowMetadata (
141
- data_row_id = dr . uid ,
153
+ data_row_id = data_row_id ,
142
154
fields = [
143
155
EMBEDDING_SCHEMA_ID , #
144
156
CAPTURE_DT_SCHEMA_ID
145
157
]))
146
-
147
158
errors = mdo .bulk_delete (deletes )
148
159
assert len (errors ) == 0
149
- for dr in big_dataset .data_rows ():
150
- # 1 remaining because only the embeddings id overlaps
151
- assert len (mdo .bulk_export ([dr .uid ])[0 ].fields ) == 1
160
+ for data_row_id in data_row_ids :
161
+ # 2 remaining because we delete the user provided embedding but text and labelbox generated embeddings still exist
162
+ fields = mdo .bulk_export ([data_row_id ])[0 ].fields
163
+ assert len (fields ) == 2
164
+ assert EMBEDDING_SCHEMA_ID not in [field .schema_id for field in fields ]
152
165
153
166
154
167
def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , mdo ):
155
168
"""test bulk deletes for non non fields"""
156
- n_fields = len ( mdo . bulk_export ( [datarow .uid ])[ 0 ]. fields )
169
+ wait_for_embeddings_svc ( [datarow .uid ], mdo )
157
170
metadata = make_metadata (datarow .uid )
158
171
metadata .fields = [
159
172
m for m in metadata .fields if m .schema_id == SPLIT_SCHEMA_ID
@@ -167,7 +180,7 @@ def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
167
180
mdo .bulk_delete ([
168
181
DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [SPLIT_SCHEMA_ID ])
169
182
])
170
- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == n_fields
183
+ assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 1
171
184
172
185
173
186
def test_raise_enum_upsert_schema_error (datarow , mdo ):
@@ -202,24 +215,6 @@ def test_delete_non_existent_schema_id(datarow, mdo):
202
215
# No message is returned
203
216
204
217
205
- @pytest .mark .slow
206
- def test_large_bulk_delete_non_existent_schema_id (big_dataset , mdo ):
207
- deletes = []
208
- n_fields_start = 0
209
- for idx , dr in enumerate (big_dataset .data_rows ()):
210
- if idx == 0 :
211
- n_fields_start = len (mdo .bulk_export ([dr .uid ])[0 ].fields )
212
- deletes .append (
213
- DeleteDataRowMetadata (data_row_id = dr .uid ,
214
- fields = [EMBEDDING_SCHEMA_ID ]))
215
- errors = mdo .bulk_delete (deletes )
216
- assert len (errors ) == 0
217
-
218
- for dr in big_dataset .export_data_rows ():
219
- assert len (mdo .bulk_export ([dr .uid ])[0 ].fields ) == n_fields_start
220
- break
221
-
222
-
223
218
def test_parse_raw_metadata (mdo ):
224
219
example = {
225
220
'dataRowId' :
0 commit comments