Skip to content

Commit 825c456

Browse files
authored
Merge pull request #295 from Labelbox/ms/bulk-metadata-query
bulk metadata export
2 parents 57f60bd + 1046699 commit 825c456

File tree

6 files changed

+125
-113
lines changed

6 files changed

+125
-113
lines changed

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
11
# Changelog
2+
# Version 3.6.0 (2021-04-10)
3+
## Added
4+
* Bulk export metadata with `DataRowMetadataOntology.bulk_export()`
5+
* Add docstring examples of annotation types and a few helper methods
6+
7+
## Updated
8+
* Update metadata notebook under examples/basics to include bulk_export.
9+
* Allow color to be a single integer when constructing Mask objects
10+
* Allow users to pass int arrays to RasterData and attempt coercion to uint8
11+
12+
## Removed
13+
* data_row.metadata was removed in favor of bulk exports.
14+
15+
216
# Version 3.5.0 (2021-15-09)
317
## Added
418
* Diagnostics custom metrics

examples/basics/data_row_metadata.ipynb

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@
5555
},
5656
"outputs": [],
5757
"source": [
58-
"%%capture\n",
59-
"!pip install --upgrade tensorflow-hub scikit-learn\n",
60-
"!pip install --upgrade \"labelbox[data]\""
58+
"!pip install -q --upgrade tensorflow-hub \\\n",
59+
" scikit-learn \\\n",
60+
" seaborn \\\n",
61+
" \"labelbox[data]\""
6162
]
6263
},
6364
{
@@ -90,12 +91,13 @@
9091
")\n",
9192
"from sklearn.random_projection import GaussianRandomProjection\n",
9293
"import seaborn as sns\n",
93-
"import datetime\n",
94+
"from datetime import datetime\n",
9495
"from pprint import pprint\n",
9596
"import tensorflow_hub as hub\n",
9697
"from tqdm.notebook import tqdm\n",
9798
"import requests\n",
98-
"import tensorflow as tf"
99+
"import tensorflow as tf\n",
100+
"from pprint import pprint"
99101
]
100102
},
101103
{
@@ -361,7 +363,7 @@
361363
" split = \"cko8sbscr0003h2dk04w86hof\"\n",
362364
" \n",
363365
" embeddings.append(list(model(processor(response.content), training=False)[0].numpy()))\n",
364-
" dt = datetime.datetime.utcnow() \n",
366+
" dt = datetime.utcnow() \n",
365367
" message =\"my-new-message\"\n",
366368
"\n",
367369
" uploads.append(\n",
@@ -525,7 +527,7 @@
525527
},
526528
"outputs": [],
527529
"source": [
528-
"metadata = mdo.parse_metadata([datarow.metadata])"
530+
"metadata = mdo.bulk_export([datarow.uid])[0]"
529531
]
530532
},
531533
{
@@ -584,7 +586,7 @@
584586
},
585587
"outputs": [],
586588
"source": [
587-
"len(client.get_data_row(deletes.data_row_id).metadata[\"fields\"])"
589+
"len(mdo.bulk_export(deletes.data_row_id)[0].fields)"
588590
]
589591
},
590592
{
@@ -608,7 +610,7 @@
608610
},
609611
"outputs": [],
610612
"source": [
611-
"len(client.get_data_row(deletes.data_row_id).metadata[\"fields\"])"
613+
"len(mdo.bulk_export(deletes.data_row_id)[0].fields)"
612614
]
613615
},
614616
{

labelbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "labelbox"
2-
__version__ = "3.5.0"
2+
__version__ = "3.6.0"
33

44
from labelbox.schema.project import Project
55
from labelbox.client import Client

labelbox/schema/data_row.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ class DataRow(DbObject, Updateable, BulkDeletable):
2020
updated_at (datetime)
2121
created_at (datetime)
2222
media_attributes (dict): generated media attributes for the datarow
23-
metadata (dict): uploaded metadata
2423
2524
dataset (Relationship): `ToOne` relationship to Dataset
2625
created_by (Relationship): `ToOne` relationship to User
2726
organization (Relationship): `ToOne` relationship to Organization
2827
labels (Relationship): `ToMany` relationship to Label
2928
attachments (Relationship) `ToMany` relationship with AssetAttachment
30-
metadata (Relationship): This Relationship is Deprecated. Please use `DataRow.attachments()` instead
3129
"""
3230
external_id = Field.String("external_id")
3331
row_data = Field.String("row_data")
@@ -50,33 +48,6 @@ def __init__(self, *args, **kwargs):
5048
self.attachments.supports_filtering = False
5149
self.attachments.supports_sorting = False
5250

53-
@property
54-
def metadata(self) -> Dict[str, Union[str, List[Dict]]]:
55-
"""Get metadata for datarow
56-
"""
57-
58-
query = """query GetDataRowMetadataBetaPyApi($dataRowID: ID!) {
59-
dataRow(where: {id: $dataRowID}) {
60-
customMetadata {
61-
value
62-
schemaId
63-
}
64-
}
65-
}
66-
"""
67-
68-
metadata = self.client.execute(
69-
query, {"dataRowID": self.uid})["dataRow"]["customMetadata"]
70-
71-
return {
72-
"data_row_id":
73-
self.uid,
74-
"fields": [{
75-
"schema_id": m["schemaId"],
76-
"value": m["value"]
77-
} for m in metadata]
78-
}
79-
8051
@staticmethod
8152
def bulk_delete(data_rows):
8253
""" Deletes all the given DataRows.

labelbox/schema/data_row_metadata.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class DeleteDataRowMetadata(_CamelCaseMixin):
6464

6565
class DataRowMetadataBatchResponse(_CamelCaseMixin):
6666
data_row_id: str
67-
error: str
67+
error: Optional[str] = None
6868
fields: List[Union[DataRowMetadataField, SchemaId]]
6969

7070

@@ -200,7 +200,7 @@ def parse_metadata(
200200
for dr in unparsed:
201201
fields = []
202202
for f in dr["fields"]:
203-
schema = self.all_fields_id_index[f["schema_id"]]
203+
schema = self.all_fields_id_index[f["schemaId"]]
204204
if schema.kind == DataRowMetadataKind.enum:
205205
continue
206206
elif schema.kind == DataRowMetadataKind.option:
@@ -212,7 +212,7 @@ def parse_metadata(
212212

213213
fields.append(field)
214214
parsed.append(
215-
DataRowMetadata(data_row_id=dr["data_row_id"], fields=fields))
215+
DataRowMetadata(data_row_id=dr["dataRowId"], fields=fields))
216216
return parsed
217217

218218
def bulk_upsert(
@@ -330,6 +330,44 @@ def _batch_delete(
330330
items,
331331
batch_size=self._batch_size)
332332

333+
def bulk_export(self, data_row_ids: List[str]) -> List[DataRowMetadata]:
334+
""" Exports metadata for a list of data rows
335+
336+
>>> mdo.bulk_export([data_row.uid for data_row in data_rows])
337+
338+
Args:
339+
data_row_ids: List of data data rows to fetch metadata for
340+
Returns:
341+
A list of DataRowMetadata.
342+
There will be one DataRowMetadata for each data_row_id passed in.
343+
This is true even if the data row does not have any meta data.
344+
Data rows without metadata will have empty `fields`.
345+
346+
"""
347+
348+
if not len(data_row_ids):
349+
raise ValueError("Empty list passed")
350+
351+
def _bulk_export(_data_row_ids: List[str]) -> List[DataRowMetadata]:
352+
query = """query dataRowCustomMetadataPyApi($dataRowIds: [ID!]!) {
353+
dataRowCustomMetadata(where: {dataRowIds : $dataRowIds}) {
354+
dataRowId
355+
fields {
356+
value
357+
schemaId
358+
}
359+
}
360+
}
361+
"""
362+
return self.parse_metadata(
363+
self.client.execute(
364+
query,
365+
{"dataRowIds": _data_row_ids})['dataRowCustomMetadata'])
366+
367+
return _batch_operations(_bulk_export,
368+
data_row_ids,
369+
batch_size=self._batch_size)
370+
333371
def _parse_upsert(
334372
self, metadatum: DataRowMetadataField
335373
) -> List[_UpsertDataRowMetadataInput]:

0 commit comments

Comments
 (0)