@@ -58,76 +58,70 @@ def test_get_datarow_metadata_ontology(mdo):
58
58
assert len (mdo .custom_fields ) == 0
59
59
60
60
61
- def test_get_datarow_metadata (datarow ):
62
- """No metadata"""
63
- md = datarow .metadata
64
- assert len (md )
65
-
66
-
67
61
def test_bulk_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
68
- n_fields = len (datarow .metadata ["fields" ])
69
62
metadata = make_metadata (datarow .uid )
70
63
mdo .bulk_upsert ([metadata ])
71
- assert len (datarow .metadata ["fields" ]) > n_fields
64
+ assert len (mdo .bulk_export ([datarow .uid ]))
65
+ assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields )
72
66
73
67
74
68
def test_parse_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
75
69
metadata = make_metadata (datarow .uid )
76
70
mdo .bulk_upsert ([metadata ])
77
- assert mdo .parse_metadata ([datarow .metadata ])
71
+ assert mdo .bulk_export ([datarow .uid ])
78
72
79
73
80
74
@pytest .mark .slow
81
75
def test_large_bulk_upsert_datarow_metadata (big_dataset , mdo ):
82
76
metadata = []
83
- for dr in big_dataset .export_data_rows ():
77
+ data_row_ids = []
78
+ for dr in big_dataset .data_rows ():
84
79
metadata .append (make_metadata (dr .uid ))
80
+ data_row_ids .append (dr .uid )
85
81
errors = mdo .bulk_upsert (metadata )
86
82
assert len (errors ) == 0
87
83
88
- for dr in big_dataset .export_data_rows ():
89
- assert len (dr .metadata ["fields" ])
90
- break
84
+ metadata_lookup = {
85
+ metadata .data_row_id : metadata
86
+ for metadata in mdo .bulk_export (data_row_ids )
87
+ }
88
+ for data_row_id in data_row_ids :
89
+ assert len (metadata_lookup .get (data_row_id ).fields )
91
90
92
91
93
92
def test_bulk_delete_datarow_metadata (datarow , mdo ):
94
93
"""test bulk deletes for all fields"""
95
94
metadata = make_metadata (datarow .uid )
96
95
mdo .bulk_upsert ([metadata ])
97
96
98
- assert len (datarow .metadata [ "fields" ] )
99
- upload_ids = [m .schema_id for m in metadata .fields ]
97
+ assert len (mdo . bulk_export ([ datarow .uid ])[ 0 ]. fields )
98
+ upload_ids = [m .schema_id for m in metadata .fields [: - 2 ] ]
100
99
mdo .bulk_delete (
101
100
[DeleteDataRowMetadata (data_row_id = datarow .uid , fields = upload_ids )])
102
- remaining_ids = set ([f ['schema_id' ] for f in datarow .metadata ["fields" ]])
101
+ remaining_ids = set (
102
+ [f .schema_id for f in mdo .bulk_export ([datarow .uid ])[0 ].fields ])
103
103
assert not len (remaining_ids .intersection (set (upload_ids )))
104
104
105
105
106
- @pytest .mark .skip
107
106
def test_bulk_partial_delete_datarow_metadata (datarow , mdo ):
108
107
"""Delete a single from metadata"""
109
- n_fields = len (datarow .metadata ["fields" ])
110
-
108
+ n_fields = len (mdo .bulk_export ([datarow .uid ])[0 ].fields )
111
109
metadata = make_metadata (datarow .uid )
112
110
mdo .bulk_upsert ([metadata ])
113
111
114
- assert len (datarow .metadata ["fields" ]) == (n_fields + 5 )
112
+ assert len (mdo .bulk_export (
113
+ [datarow .uid ])[0 ].fields ) == (n_fields + len (metadata .fields ))
115
114
116
115
mdo .bulk_delete ([
117
116
DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [TEXT_SCHEMA_ID ])
118
117
])
118
+ assert len (mdo .bulk_export (
119
+ [datarow .uid ])[0 ].fields ) == (n_fields + len (metadata .fields ) - 1 )
119
120
120
- assert len (datarow .metadata ["fields" ]) == (n_fields + 4 )
121
121
122
-
123
- @pytest .mark .skip
124
122
def test_large_bulk_delete_datarow_metadata (big_dataset , mdo ):
125
123
metadata = []
126
- n_fields_start = 0
127
- for idx , dr in enumerate (big_dataset .export_data_rows ()):
128
- if idx == 0 :
129
- n_fields_start = len (dr .metadata ["fields" ])
130
-
124
+ for dr in big_dataset .data_rows ():
131
125
metadata .append (
132
126
DataRowMetadata (data_row_id = dr .uid ,
133
127
fields = [
@@ -153,27 +147,27 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
153
147
errors = mdo .bulk_delete (deletes )
154
148
assert len (errors ) == 0
155
149
for dr in big_dataset .data_rows ():
156
- assert len ( dr . metadata [ "fields" ]) == n_fields_start
157
- break
150
+ # 1 remaining because only the embeddings id overlaps
151
+ assert len ( mdo . bulk_export ([ dr . uid ])[ 0 ]. fields ) == 1
158
152
159
153
160
- @pytest .mark .skip
161
154
def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , mdo ):
162
155
"""test bulk deletes for non non fields"""
163
- n_fields = len (datarow .metadata [ "fields" ] )
156
+ n_fields = len (mdo . bulk_export ([ datarow .uid ])[ 0 ]. fields )
164
157
metadata = make_metadata (datarow .uid )
165
158
metadata .fields = [
166
159
m for m in metadata .fields if m .schema_id == SPLIT_SCHEMA_ID
167
160
]
168
161
mdo .bulk_upsert ([metadata ])
169
- assert len (datarow .metadata ["fields" ]) == len (
162
+
163
+ assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == len (
170
164
set ([x .schema_id for x in metadata .fields ] +
171
- [x [ ' schema_id' ] for x in datarow .metadata [ "fields" ] ]))
165
+ [x . schema_id for x in mdo . bulk_export ([ datarow .uid ])[ 0 ]. fields ]))
172
166
173
167
mdo .bulk_delete ([
174
168
DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [SPLIT_SCHEMA_ID ])
175
169
])
176
- assert len (datarow .metadata [ "fields" ] ) == n_fields
170
+ assert len (mdo . bulk_export ([ datarow .uid ])[ 0 ]. fields ) == n_fields
177
171
178
172
179
173
def test_raise_enum_upsert_schema_error (datarow , mdo ):
@@ -209,45 +203,44 @@ def test_delete_non_existent_schema_id(datarow, mdo):
209
203
210
204
211
205
@pytest .mark .slow
212
- @pytest .mark .skip ("Test is inconsistent." )
213
206
def test_large_bulk_delete_non_existent_schema_id (big_dataset , mdo ):
214
207
deletes = []
215
208
n_fields_start = 0
216
- for idx , dr in enumerate (big_dataset .export_data_rows ()):
209
+ for idx , dr in enumerate (big_dataset .data_rows ()):
217
210
if idx == 0 :
218
- n_fields_start = len (dr .metadata [ "fields" ] )
211
+ n_fields_start = len (mdo . bulk_export ([ dr .uid ])[ 0 ]. fields )
219
212
deletes .append (
220
213
DeleteDataRowMetadata (data_row_id = dr .uid ,
221
214
fields = [EMBEDDING_SCHEMA_ID ]))
222
215
errors = mdo .bulk_delete (deletes )
223
216
assert len (errors ) == 0
224
217
225
218
for dr in big_dataset .export_data_rows ():
226
- assert len (dr .metadata [ "fields" ] ) == n_fields_start
219
+ assert len (mdo . bulk_export ([ dr .uid ])[ 0 ]. fields ) == n_fields_start
227
220
break
228
221
229
222
230
223
def test_parse_raw_metadata (mdo ):
231
224
example = {
232
- 'data_row_id ' :
225
+ 'dataRowId ' :
233
226
'ckr6kkfx801ui0yrtg9fje8xh' ,
234
227
'fields' : [{
235
- 'schema_id ' : 'cko8s9r5v0001h2dk9elqdidh' ,
228
+ 'schemaId ' : 'cko8s9r5v0001h2dk9elqdidh' ,
236
229
'value' : 'my-new-message'
237
230
}, {
238
- 'schema_id ' : 'cko8sbczn0002h2dkdaxb5kal' ,
231
+ 'schemaId ' : 'cko8sbczn0002h2dkdaxb5kal' ,
239
232
'value' : {}
240
233
}, {
241
- 'schema_id ' : 'cko8sbscr0003h2dk04w86hof' ,
234
+ 'schemaId ' : 'cko8sbscr0003h2dk04w86hof' ,
242
235
'value' : {}
243
236
}, {
244
- 'schema_id ' : 'cko8sdzv70006h2dk8jg64zvb' ,
237
+ 'schemaId ' : 'cko8sdzv70006h2dk8jg64zvb' ,
245
238
'value' : '2021-07-20T21:41:14.606710Z'
246
239
}]
247
240
}
248
241
249
242
parsed = mdo .parse_metadata ([example ])
250
243
assert len (parsed ) == 1
251
244
row = parsed [0 ]
252
- assert row .data_row_id == example ["data_row_id " ]
245
+ assert row .data_row_id == example ["dataRowId " ]
253
246
assert len (row .fields ) == 3
0 commit comments