7
7
from labelbox .schema .data_row_metadata import DataRowMetadataField , DataRowMetadata , DeleteDataRowMetadata , \
8
8
DataRowMetadataOntology
9
9
10
+ INVALID_SCHEMA_ID = "1" * 25
10
11
FAKE_SCHEMA_ID = "0" * 25
11
12
FAKE_DATAROW_ID = "D" * 25
12
13
SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
15
16
EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
16
17
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
17
18
CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb"
19
+ PRE_COMPUTED_EMBEDDINGS_ID = 'ckrzang79000008l6hb5s6za1'
18
20
19
21
FAKE_NUMBER_FIELD = {
20
22
"id" : FAKE_SCHEMA_ID ,
@@ -40,24 +42,13 @@ def big_dataset(dataset: Dataset, image_url):
40
42
"row_data" : image_url ,
41
43
"external_id" : "my-image"
42
44
},
43
- ] * 250 )
45
+ ] * 5 )
44
46
task .wait_till_done ()
45
47
46
48
yield dataset
47
49
dataset .delete ()
48
50
49
51
50
- def wait_for_embeddings_svc (data_row_ids , mdo ):
51
- for idx in range (5 ):
52
- if all ([
53
- len (metadata .fields )
54
- for metadata in mdo .bulk_export (data_row_ids )
55
- ]):
56
- return
57
- time .sleep ((idx + 1 )** 2 )
58
- raise Exception ("Embedding svc failed to update metadata." )
59
-
60
-
61
52
def make_metadata (dr_id ) -> DataRowMetadata :
62
53
embeddings = [0.0 ] * 128
63
54
msg = "A message"
@@ -97,18 +88,20 @@ def test_get_datarow_metadata_ontology(mdo):
97
88
98
89
99
90
def test_bulk_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
100
- wait_for_embeddings_svc ([datarow .uid ], mdo )
101
91
metadata = make_metadata (datarow .uid )
102
92
mdo .bulk_upsert ([metadata ])
103
- assert len (mdo .bulk_export ([datarow .uid ]))
104
- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 5
93
+ exported = mdo .bulk_export ([datarow .uid ])
94
+ assert len (exported )
95
+ assert len ([
96
+ field for field in exported [0 ].fields
97
+ if field .schema_id != PRE_COMPUTED_EMBEDDINGS_ID
98
+ ]) == 4
105
99
106
100
107
101
@pytest .mark .slow
108
102
def test_large_bulk_upsert_datarow_metadata (big_dataset , mdo ):
109
103
metadata = []
110
104
data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
111
- wait_for_embeddings_svc (data_row_ids , mdo )
112
105
for data_row_id in data_row_ids :
113
106
metadata .append (make_metadata (data_row_id ))
114
107
errors = mdo .bulk_upsert (metadata )
@@ -119,14 +112,16 @@ def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo):
119
112
for metadata in mdo .bulk_export (data_row_ids )
120
113
}
121
114
for data_row_id in data_row_ids :
122
- assert len (metadata_lookup .get (data_row_id ).fields )
115
+ assert len ([
116
+ f for f in metadata_lookup .get (data_row_id ).fields
117
+ if f .schema_id != PRE_COMPUTED_EMBEDDINGS_ID
118
+ ]), metadata_lookup .get (data_row_id ).fields
123
119
124
120
125
121
def test_bulk_delete_datarow_metadata (datarow , mdo ):
126
122
"""test bulk deletes for all fields"""
127
123
metadata = make_metadata (datarow .uid )
128
124
mdo .bulk_upsert ([metadata ])
129
-
130
125
assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields )
131
126
upload_ids = [m .schema_id for m in metadata .fields [:- 2 ]]
132
127
mdo .bulk_delete (
@@ -155,7 +150,6 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
155
150
def test_large_bulk_delete_datarow_metadata (big_dataset , mdo ):
156
151
metadata = []
157
152
data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
158
- wait_for_embeddings_svc (data_row_ids , mdo )
159
153
for data_row_id in data_row_ids :
160
154
metadata .append (
161
155
DataRowMetadata (data_row_id = data_row_id ,
@@ -181,29 +175,33 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
181
175
errors = mdo .bulk_delete (deletes )
182
176
assert len (errors ) == 0
183
177
for data_row_id in data_row_ids :
184
- # 2 remaining because we delete the user provided embedding but text and labelbox generated embeddings still exist
185
- fields = mdo .bulk_export ([data_row_id ])[0 ].fields
186
- assert len (fields ) == 2
178
+ fields = [
179
+ f for f in mdo .bulk_export ([data_row_id ])[0 ].fields
180
+ if f .schema_id != PRE_COMPUTED_EMBEDDINGS_ID
181
+ ]
182
+ assert len (fields ) == 1 , fields
187
183
assert EMBEDDING_SCHEMA_ID not in [field .schema_id for field in fields ]
188
184
189
185
190
186
def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , mdo ):
191
187
"""test bulk deletes for non non fields"""
192
- wait_for_embeddings_svc ([datarow .uid ], mdo )
193
188
metadata = make_metadata (datarow .uid )
194
189
metadata .fields = [
195
190
m for m in metadata .fields if m .schema_id == SPLIT_SCHEMA_ID
196
191
]
197
192
mdo .bulk_upsert ([metadata ])
198
193
199
- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == len (
194
+ exported = mdo .bulk_export ([datarow .uid ])[0 ].fields
195
+ assert len (exported ) == len (
200
196
set ([x .schema_id for x in metadata .fields ] +
201
- [x .schema_id for x in mdo . bulk_export ([ datarow . uid ])[ 0 ]. fields ]))
197
+ [x .schema_id for x in exported ]))
202
198
203
199
mdo .bulk_delete ([
204
200
DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [SPLIT_SCHEMA_ID ])
205
201
])
206
- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 1
202
+ exported = mdo .bulk_export ([datarow .uid ])[0 ].fields
203
+ assert len (
204
+ [f for f in exported if f .schema_id != PRE_COMPUTED_EMBEDDINGS_ID ]) == 0
207
205
208
206
209
207
def test_raise_enum_upsert_schema_error (datarow , mdo ):
@@ -223,7 +221,7 @@ def test_upsert_non_existent_schema_id(datarow, mdo):
223
221
metadata = DataRowMetadata (data_row_id = datarow .uid ,
224
222
fields = [
225
223
DataRowMetadataField (
226
- schema_id = FAKE_SCHEMA_ID ,
224
+ schema_id = INVALID_SCHEMA_ID ,
227
225
value = "message" ),
228
226
])
229
227
with pytest .raises (ValueError ):
0 commit comments