Skip to content

Commit 1500a1a

Browse files
author
Matt Sokoloff
committed
added bulk export metadata
1 parent 0736823 commit 1500a1a

File tree

3 files changed

+79
-77
lines changed

3 files changed

+79
-77
lines changed

labelbox/schema/data_row.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ class DataRow(DbObject, Updateable, BulkDeletable):
2020
updated_at (datetime)
2121
created_at (datetime)
2222
media_attributes (dict): generated media attributes for the datarow
23-
metadata (dict): uploaded metadata
2423
2524
dataset (Relationship): `ToOne` relationship to Dataset
2625
created_by (Relationship): `ToOne` relationship to User
2726
organization (Relationship): `ToOne` relationship to Organization
2827
labels (Relationship): `ToMany` relationship to Label
2928
attachments (Relationship) `ToMany` relationship with AssetAttachment
30-
metadata (Relationship): This Relationship is Deprecated. Please use `DataRow.attachments()` instead
3129
"""
3230
external_id = Field.String("external_id")
3331
row_data = Field.String("row_data")
@@ -50,33 +48,6 @@ def __init__(self, *args, **kwargs):
5048
self.attachments.supports_filtering = False
5149
self.attachments.supports_sorting = False
5250

53-
@property
54-
def metadata(self) -> Dict[str, Union[str, List[Dict]]]:
55-
"""Get metadata for datarow
56-
"""
57-
58-
query = """query GetDataRowMetadataBetaPyApi($dataRowID: ID!) {
59-
dataRow(where: {id: $dataRowID}) {
60-
customMetadata {
61-
value
62-
schemaId
63-
}
64-
}
65-
}
66-
"""
67-
68-
metadata = self.client.execute(
69-
query, {"dataRowID": self.uid})["dataRow"]["customMetadata"]
70-
71-
return {
72-
"data_row_id":
73-
self.uid,
74-
"fields": [{
75-
"schema_id": m["schemaId"],
76-
"value": m["value"]
77-
} for m in metadata]
78-
}
79-
8051
@staticmethod
8152
def bulk_delete(data_rows):
8253
""" Deletes all the given DataRows.

labelbox/schema/data_row_metadata.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class DeleteDataRowMetadata(_CamelCaseMixin):
6464

6565
class DataRowMetadataBatchResponse(_CamelCaseMixin):
6666
data_row_id: str
67-
error: str
67+
error: Optional[str] = None
6868
fields: List[Union[DataRowMetadataField, SchemaId]]
6969

7070

@@ -200,7 +200,7 @@ def parse_metadata(
200200
for dr in unparsed:
201201
fields = []
202202
for f in dr["fields"]:
203-
schema = self.all_fields_id_index[f["schema_id"]]
203+
schema = self.all_fields_id_index[f["schemaId"]]
204204
if schema.kind == DataRowMetadataKind.enum:
205205
continue
206206
elif schema.kind == DataRowMetadataKind.option:
@@ -212,7 +212,7 @@ def parse_metadata(
212212

213213
fields.append(field)
214214
parsed.append(
215-
DataRowMetadata(data_row_id=dr["data_row_id"], fields=fields))
215+
DataRowMetadata(data_row_id=dr["dataRowId"], fields=fields))
216216
return parsed
217217

218218
def bulk_upsert(
@@ -330,6 +330,44 @@ def _batch_delete(
330330
items,
331331
batch_size=self._batch_size)
332332

333+
def bulk_export(self, data_row_ids: List[str]) -> List[DataRowMetadata]:
334+
""" Exports metadata for a list of data rows
335+
336+
>>> mdo.batch_delete([data_row.uid for data_row in data_rows])
337+
338+
Args:
339+
data_row_ids: List of data data rows to fetch metadata for
340+
341+
Returns:
342+
list of unsuccessful deletions.
343+
An empty list means all data rows were successfully deleted.
344+
345+
"""
346+
347+
if not len(data_row_ids):
348+
raise ValueError("Empty list passed")
349+
# TODO: Fix the name here. This should not be deletes!!!!!!!!!!!!!!!!!!!
350+
def _bulk_export(
351+
deletes: List[_DeleteBatchDataRowMetadata]
352+
) -> List[DataRowMetadata]:
353+
query = """query dataRowCustomMetadataPyApi($dataRowIds: [ID!]!) {
354+
dataRowCustomMetadata(where: {dataRowIds : $dataRowIds}) {
355+
dataRowId
356+
fields {
357+
value
358+
schemaId
359+
}
360+
}
361+
}
362+
"""
363+
return self.parse_metadata(
364+
self.client.execute(
365+
query, {"dataRowIds": deletes})['dataRowCustomMetadata'])
366+
367+
return _batch_operations(_bulk_export,
368+
data_row_ids,
369+
batch_size=self._batch_size)
370+
333371
def _parse_upsert(
334372
self, metadatum: DataRowMetadataField
335373
) -> List[_UpsertDataRowMetadataInput]:

tests/integration/test_data_row_metadata.py

Lines changed: 38 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -58,76 +58,70 @@ def test_get_datarow_metadata_ontology(mdo):
5858
assert len(mdo.custom_fields) == 0
5959

6060

61-
def test_get_datarow_metadata(datarow):
62-
"""No metadata"""
63-
md = datarow.metadata
64-
assert len(md)
65-
66-
6761
def test_bulk_upsert_datarow_metadata(datarow, mdo: DataRowMetadataOntology):
68-
n_fields = len(datarow.metadata["fields"])
6962
metadata = make_metadata(datarow.uid)
7063
mdo.bulk_upsert([metadata])
71-
assert len(datarow.metadata["fields"]) > n_fields
64+
assert len(mdo.bulk_export([datarow.uid]))
65+
assert len(mdo.bulk_export([datarow.uid])[0].fields)
7266

7367

7468
def test_parse_upsert_datarow_metadata(datarow, mdo: DataRowMetadataOntology):
7569
metadata = make_metadata(datarow.uid)
7670
mdo.bulk_upsert([metadata])
77-
assert mdo.parse_metadata([datarow.metadata])
71+
assert mdo.bulk_export([datarow.uid])
7872

7973

8074
@pytest.mark.slow
8175
def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo):
8276
metadata = []
83-
for dr in big_dataset.export_data_rows():
77+
data_row_ids = []
78+
for dr in big_dataset.data_rows():
8479
metadata.append(make_metadata(dr.uid))
80+
data_row_ids.append(dr.uid)
8581
errors = mdo.bulk_upsert(metadata)
8682
assert len(errors) == 0
8783

88-
for dr in big_dataset.export_data_rows():
89-
assert len(dr.metadata["fields"])
90-
break
84+
metadata_lookup = {
85+
metadata.data_row_id: metadata
86+
for metadata in mdo.bulk_export(data_row_ids)
87+
}
88+
for data_row_id in data_row_ids:
89+
assert len(metadata_lookup.get(data_row_id).fields)
9190

9291

9392
def test_bulk_delete_datarow_metadata(datarow, mdo):
9493
"""test bulk deletes for all fields"""
9594
metadata = make_metadata(datarow.uid)
9695
mdo.bulk_upsert([metadata])
9796

98-
assert len(datarow.metadata["fields"])
99-
upload_ids = [m.schema_id for m in metadata.fields]
97+
assert len(mdo.bulk_export([datarow.uid])[0].fields)
98+
upload_ids = [m.schema_id for m in metadata.fields[:-2]]
10099
mdo.bulk_delete(
101100
[DeleteDataRowMetadata(data_row_id=datarow.uid, fields=upload_ids)])
102-
remaining_ids = set([f['schema_id'] for f in datarow.metadata["fields"]])
101+
remaining_ids = set(
102+
[f.schema_id for f in mdo.bulk_export([datarow.uid])[0].fields])
103103
assert not len(remaining_ids.intersection(set(upload_ids)))
104104

105105

106-
@pytest.mark.skip
107106
def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
108107
"""Delete a single from metadata"""
109-
n_fields = len(datarow.metadata["fields"])
110-
108+
n_fields = len(mdo.bulk_export([datarow.uid])[0].fields)
111109
metadata = make_metadata(datarow.uid)
112110
mdo.bulk_upsert([metadata])
113111

114-
assert len(datarow.metadata["fields"]) == (n_fields + 5)
112+
assert len(mdo.bulk_export(
113+
[datarow.uid])[0].fields) == (n_fields + len(metadata.fields))
115114

116115
mdo.bulk_delete([
117116
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[TEXT_SCHEMA_ID])
118117
])
118+
assert len(mdo.bulk_export(
119+
[datarow.uid])[0].fields) == (n_fields + len(metadata.fields) - 1)
119120

120-
assert len(datarow.metadata["fields"]) == (n_fields + 4)
121121

122-
123-
@pytest.mark.skip
124122
def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
125123
metadata = []
126-
n_fields_start = 0
127-
for idx, dr in enumerate(big_dataset.export_data_rows()):
128-
if idx == 0:
129-
n_fields_start = len(dr.metadata["fields"])
130-
124+
for dr in big_dataset.data_rows():
131125
metadata.append(
132126
DataRowMetadata(data_row_id=dr.uid,
133127
fields=[
@@ -153,27 +147,27 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
153147
errors = mdo.bulk_delete(deletes)
154148
assert len(errors) == 0
155149
for dr in big_dataset.data_rows():
156-
assert len(dr.metadata["fields"]) == n_fields_start
157-
break
150+
# 1 remaining because only the embeddings id overlaps
151+
assert len(mdo.bulk_export([dr.uid])[0].fields) == 1
158152

159153

160-
@pytest.mark.skip
161154
def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
162155
"""test bulk deletes for non non fields"""
163-
n_fields = len(datarow.metadata["fields"])
156+
n_fields = len(mdo.bulk_export([datarow.uid])[0].fields)
164157
metadata = make_metadata(datarow.uid)
165158
metadata.fields = [
166159
m for m in metadata.fields if m.schema_id == SPLIT_SCHEMA_ID
167160
]
168161
mdo.bulk_upsert([metadata])
169-
assert len(datarow.metadata["fields"]) == len(
162+
163+
assert len(mdo.bulk_export([datarow.uid])[0].fields) == len(
170164
set([x.schema_id for x in metadata.fields] +
171-
[x['schema_id'] for x in datarow.metadata["fields"]]))
165+
[x.schema_id for x in mdo.bulk_export([datarow.uid])[0].fields]))
172166

173167
mdo.bulk_delete([
174168
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
175169
])
176-
assert len(datarow.metadata["fields"]) == n_fields
170+
assert len(mdo.bulk_export([datarow.uid])[0].fields) == n_fields
177171

178172

179173
def test_raise_enum_upsert_schema_error(datarow, mdo):
@@ -209,45 +203,44 @@ def test_delete_non_existent_schema_id(datarow, mdo):
209203

210204

211205
@pytest.mark.slow
212-
@pytest.mark.skip("Test is inconsistent.")
213206
def test_large_bulk_delete_non_existent_schema_id(big_dataset, mdo):
214207
deletes = []
215208
n_fields_start = 0
216-
for idx, dr in enumerate(big_dataset.export_data_rows()):
209+
for idx, dr in enumerate(big_dataset.data_rows()):
217210
if idx == 0:
218-
n_fields_start = len(dr.metadata["fields"])
211+
n_fields_start = len(mdo.bulk_export([dr.uid])[0].fields)
219212
deletes.append(
220213
DeleteDataRowMetadata(data_row_id=dr.uid,
221214
fields=[EMBEDDING_SCHEMA_ID]))
222215
errors = mdo.bulk_delete(deletes)
223216
assert len(errors) == 0
224217

225218
for dr in big_dataset.export_data_rows():
226-
assert len(dr.metadata["fields"]) == n_fields_start
219+
assert len(mdo.bulk_export([dr.uid])[0].fields) == n_fields_start
227220
break
228221

229222

230223
def test_parse_raw_metadata(mdo):
231224
example = {
232-
'data_row_id':
225+
'dataRowId':
233226
'ckr6kkfx801ui0yrtg9fje8xh',
234227
'fields': [{
235-
'schema_id': 'cko8s9r5v0001h2dk9elqdidh',
228+
'schemaId': 'cko8s9r5v0001h2dk9elqdidh',
236229
'value': 'my-new-message'
237230
}, {
238-
'schema_id': 'cko8sbczn0002h2dkdaxb5kal',
231+
'schemaId': 'cko8sbczn0002h2dkdaxb5kal',
239232
'value': {}
240233
}, {
241-
'schema_id': 'cko8sbscr0003h2dk04w86hof',
234+
'schemaId': 'cko8sbscr0003h2dk04w86hof',
242235
'value': {}
243236
}, {
244-
'schema_id': 'cko8sdzv70006h2dk8jg64zvb',
237+
'schemaId': 'cko8sdzv70006h2dk8jg64zvb',
245238
'value': '2021-07-20T21:41:14.606710Z'
246239
}]
247240
}
248241

249242
parsed = mdo.parse_metadata([example])
250243
assert len(parsed) == 1
251244
row = parsed[0]
252-
assert row.data_row_id == example["data_row_id"]
245+
assert row.data_row_id == example["dataRowId"]
253246
assert len(row.fields) == 3

0 commit comments

Comments
 (0)