Skip to content

Commit 55f4d7b

Browse files
authored
Merge pull request #264 from Labelbox/develop
3.2.0
2 parents 685b23e + c4b6b46 commit 55f4d7b

File tree

15 files changed

+290
-135
lines changed

15 files changed

+290
-135
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# Changelog
22

3+
# Version 3.2.0 (2021-08-26)
4+
## Added
5+
* List `BulkImportRequest`s for a project with `Project.bulk_import_requests()`
6+
* Improvemens to `Dataset.create_data_rows()`
7+
* Add attachments when bulk importing data rows
8+
* Provide external ids when creating data rows from local files
9+
* Get more informative error messages when the api rejects an import
10+
11+
## Fix
12+
* Bug causing `project.label_generator()` to fail when projects had benchmarks
13+
314
# Version 3.1.0 (2021-08-18)
415
## Added
516
* Support for new HTML attachment type

labelbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "labelbox"
2-
__version__ = "3.1.0"
2+
__version__ = "3.2.0"
33

44
from labelbox.schema.project import Project
55
from labelbox.client import Client

labelbox/data/serialization/labelbox_v1/label.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ class LBV1Label(BaseModel):
131131
seconds_to_label: Optional[float] = Extra('Seconds to Label')
132132
agreement: Optional[float] = Extra('Agreement')
133133
benchmark_agreement: Optional[float] = Extra('Benchmark Agreement')
134-
benchmark_id: Optional[float] = Extra('Benchmark ID')
134+
benchmark_id: Optional[str] = Extra('Benchmark ID')
135135
dataset_name: Optional[str] = Extra('Dataset Name')
136136
reviews: Optional[List[Review]] = Extra('Reviews')
137137
label_url: Optional[str] = Extra('View Label')

labelbox/schema/asset_attachment.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from enum import Enum
2+
from typing import Dict
23

34
from labelbox.orm.db_object import DbObject
45
from labelbox.orm.model import Field
@@ -24,3 +25,20 @@ class AttachmentType(Enum):
2425

2526
attachment_type = Field.String("attachment_type", "type")
2627
attachment_value = Field.String("attachment_value", "value")
28+
29+
@classmethod
30+
def validate_attachment_json(cls, attachment_json: Dict[str, str]) -> None:
31+
for required_key in ['type', 'value']:
32+
if required_key not in attachment_json:
33+
raise ValueError(
34+
f"Must provide a `{required_key}` key for each attachment. Found {attachment_json}."
35+
)
36+
cls.validate_attachment_type(attachment_json['type'])
37+
38+
@classmethod
39+
def validate_attachment_type(cls, attachment_type: str) -> None:
40+
valid_types = set(cls.AttachmentType.__members__)
41+
if attachment_type not in valid_types:
42+
raise ValueError(
43+
f"meta_type must be one of {valid_types}. Found {attachment_type}"
44+
)

labelbox/schema/data_row.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,8 @@ class DataRow(DbObject, Updateable, BulkDeletable):
4242
labels = Relationship.ToMany("Label", True)
4343
attachments = Relationship.ToMany("AssetAttachment", False, "attachments")
4444

45-
supported_meta_types = supported_attachment_types = {
46-
attachment_type.value
47-
for attachment_type in AssetAttachment.AttachmentType
48-
}
45+
supported_meta_types = supported_attachment_types = set(
46+
AssetAttachment.AttachmentType.__members__)
4947

5048
def __init__(self, *args, **kwargs):
5149
super().__init__(*args, **kwargs)
@@ -103,11 +101,7 @@ def create_attachment(self, attachment_type, attachment_value):
103101
Raises:
104102
ValueError: asset_type must be one of the supported types.
105103
"""
106-
107-
if attachment_type not in self.supported_attachment_types:
108-
raise ValueError(
109-
f"meta_type must be one of {self.supported_attachment_types}. Found {attachment_type}"
110-
)
104+
AssetAttachment.validate_attachment_type(attachment_type)
111105

112106
attachment_type_param = "type"
113107
attachment_value_param = "value"

labelbox/schema/dataset.py

Lines changed: 79 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from labelbox import utils
12
import os
23
import json
34
import logging
@@ -81,13 +82,17 @@ def create_data_rows(self, items):
8182
is uploaded to Labelbox and a DataRow referencing it is created.
8283
8384
If an item is a `dict`, then it could support one of the two following structures
84-
1. For static imagery, video, and text it should map `DataRow` fields (or their names) to values.
85-
At the minimum an `item` passed as a `dict` must contain a `DataRow.row_data` key and value.
85+
1. For static imagery, video, and text it should map `DataRow` field names to values.
86+
At the minimum an `item` passed as a `dict` must contain a `row_data` key and value.
87+
If the value for row_data is a local file path and the path exists,
88+
then the local file will be uploaded to labelbox.
89+
8690
2. For tiled imagery the dict must match the import structure specified in the link below
8791
https://docs.labelbox.com/data-model/en/index-en#tiled-imagery-import
8892
8993
>>> dataset.create_data_rows([
9094
>>> {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
95+
>>> {DataRow.row_data:"/path/to/file1.jpg"},
9196
>>> "path/to/file2.jpg",
9297
>>> {"tileLayerUrl" : "http://", ...}
9398
>>> ])
@@ -115,64 +120,105 @@ def create_data_rows(self, items):
115120
DataRow = Entity.DataRow
116121

117122
def upload_if_necessary(item):
118-
if isinstance(item, str):
119-
item_url = self.client.upload_file(item)
120-
# Convert item from str into a dict so it gets processed
121-
# like all other dicts.
122-
item = {DataRow.row_data: item_url, DataRow.external_id: item}
123+
row_data = item['row_data']
124+
if os.path.exists(row_data):
125+
item_url = self.client.upload_file(item['row_data'])
126+
item = {
127+
"row_data": item_url,
128+
"external_id": item.get('external_id', item['row_data']),
129+
"attachments": item.get('attachments', [])
130+
}
123131
return item
124132

125-
with ThreadPoolExecutor(file_upload_thread_count) as executor:
126-
futures = [
127-
executor.submit(upload_if_necessary, item) for item in items
128-
]
129-
items = [future.result() for future in as_completed(futures)]
130-
131-
def convert_item(item):
132-
# Don't make any changes to tms data
133-
if "tileLayerUrl" in item:
134-
return item
135-
# Convert string names to fields.
136-
item = {
137-
key if isinstance(key, Field) else DataRow.field(key): value
138-
for key, value in item.items()
139-
}
133+
def validate_attachments(item):
134+
attachments = item.get('attachments')
135+
if attachments:
136+
if isinstance(attachments, list):
137+
for attachment in attachments:
138+
Entity.AssetAttachment.validate_attachment_json(
139+
attachment)
140+
else:
141+
raise ValueError(
142+
f"Attachments must be a list. Found {type(attachments)}"
143+
)
144+
return attachments
145+
146+
def format_row(item):
147+
# Formats user input into a consistent dict structure
148+
if isinstance(item, dict):
149+
# Convert fields to strings
150+
item = {
151+
key.name if isinstance(key, Field) else key: value
152+
for key, value in item.items()
153+
}
154+
elif isinstance(item, str):
155+
# The main advantage of using a string over a dict is that the user is specifying
156+
# that the file should exist locally.
157+
# That info is lost after this section so we should check for it here.
158+
if not os.path.exists(item):
159+
raise ValueError(f"Filepath {item} does not exist.")
160+
item = {"row_data": item, "external_id": item}
161+
return item
140162

141-
if DataRow.row_data not in item:
163+
def validate_keys(item):
164+
if 'row_data' not in item:
142165
raise InvalidQueryError(
143-
"DataRow.row_data missing when creating DataRow.")
166+
"`row_data` missing when creating DataRow.")
144167

145-
invalid_keys = set(item) - set(DataRow.fields())
168+
invalid_keys = set(item) - {
169+
*{f.name for f in DataRow.fields()}, 'attachments'
170+
}
146171
if invalid_keys:
147172
raise InvalidAttributeError(DataRow, invalid_keys)
173+
return item
174+
175+
def convert_item(item):
176+
# Don't make any changes to tms data
177+
if "tileLayerUrl" in item:
178+
validate_attachments(item)
179+
return item
180+
# Convert all payload variations into the same dict format
181+
item = format_row(item)
182+
# Make sure required keys exist (and there are no extra keys)
183+
validate_keys(item)
184+
# Make sure attachments are valid
185+
validate_attachments(item)
186+
# Upload any local file paths
187+
item = upload_if_necessary(item)
148188

149-
# Item is valid, convert it to a dict {graphql_field_name: value}
150-
# Need to change the name of DataRow.row_data to "data"
151189
return {
152-
"data" if key == DataRow.row_data else key.graphql_name: value
190+
"data" if key == "row_data" else utils.camel_case(key): value
153191
for key, value in item.items()
154192
}
155193

194+
if not isinstance(items, list):
195+
raise ValueError(
196+
f"Must pass a list to create_data_rows. Found {type(items)}")
197+
198+
with ThreadPoolExecutor(file_upload_thread_count) as executor:
199+
futures = [executor.submit(convert_item, item) for item in items]
200+
items = [future.result() for future in as_completed(futures)]
201+
156202
# Prepare and upload the desciptor file
157-
items = [convert_item(item) for item in items]
158203
data = json.dumps(items)
159204
descriptor_url = self.client.upload_data(data)
160-
161205
# Create data source
162206
dataset_param = "datasetId"
163207
url_param = "jsonUrl"
164208
query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
165209
appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
166-
){ taskId accepted } } """ % (dataset_param, url_param,
167-
dataset_param, url_param)
210+
){ taskId accepted errorMessage } } """ % (dataset_param, url_param,
211+
dataset_param, url_param)
212+
168213
res = self.client.execute(query_str, {
169214
dataset_param: self.uid,
170215
url_param: descriptor_url
171216
})
172217
res = res["appendRowsToDataset"]
173218
if not res["accepted"]:
219+
msg = res['errorMessage']
174220
raise InvalidQueryError(
175-
"Server did not accept DataRow creation request")
221+
f"Server did not accept DataRow creation request. {msg}")
176222

177223
# Fetch and return the task.
178224
task_id = res["taskId"]

labelbox/schema/project.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,26 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool:
582582
return res["project"]["showPredictionsToLabelers"][
583583
"showingPredictionsToLabelers"]
584584

585+
def bulk_import_requests(self):
586+
""" Returns bulk import request objects which are used in model-assisted labeling.
587+
These are returned with the oldest first, and most recent last.
588+
"""
589+
590+
id_param = "project_id"
591+
query_str = """query ListAllImportRequestsPyApi($%s: ID!) {
592+
bulkImportRequests (
593+
where: { projectId: $%s }
594+
skip: %%d
595+
first: %%d
596+
) {
597+
%s
598+
}
599+
}""" % (id_param, id_param,
600+
query.results_query_part(Entity.BulkImportRequest))
601+
return PaginatedCollection(self.client, query_str,
602+
{id_param: str(self.uid)},
603+
["bulkImportRequests"], BulkImportRequest)
604+
585605
def upload_annotations(
586606
self,
587607
name: str,

tests/integration/bulk_import/conftest.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
from labelbox.schema.labeling_frontend import LabelingFrontend
77
from labelbox.schema.annotation_import import MALPredictionImport
88

9-
IMG_URL = "https://picsum.photos/200/300"
10-
119

1210
@pytest.fixture
1311
def ontology():
@@ -103,7 +101,7 @@ def ontology():
103101

104102

105103
@pytest.fixture
106-
def configured_project(client, ontology, rand_gen):
104+
def configured_project(client, ontology, rand_gen, image_url):
107105
project = client.create_project(name=rand_gen(str))
108106
dataset = client.create_dataset(name=rand_gen(str))
109107
editor = list(
@@ -112,7 +110,7 @@ def configured_project(client, ontology, rand_gen):
112110
project.setup(editor, ontology)
113111
data_row_ids = []
114112
for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
115-
data_row_ids.append(dataset.create_data_row(row_data=IMG_URL).uid)
113+
data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
116114
project.datasets.connect(dataset)
117115
project.data_row_ids = data_row_ids
118116
yield project

tests/integration/bulk_import/test_bulk_import_request.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,21 +149,38 @@ def assert_file_content(url: str, predictions):
149149
assert response.text == ndjson.dumps(predictions)
150150

151151

152-
def test_delete(client, configured_project, predictions):
152+
def test_project_bulk_import_requests(client, configured_project, predictions):
153+
result = configured_project.bulk_import_requests()
154+
assert len(list(result)) == 0
155+
156+
name = str(uuid.uuid4())
157+
bulk_import_request = configured_project.upload_annotations(
158+
name=name, annotations=predictions)
159+
bulk_import_request.wait_until_done()
153160

154-
id_param = "project_id"
155-
query_str = """query bulk_import_requestsPyApi($%s: ID!) {bulkImportRequests(where: {projectId: $%s}) {id}}""" % (
156-
id_param, id_param)
161+
name = str(uuid.uuid4())
162+
bulk_import_request = configured_project.upload_annotations(
163+
name=name, annotations=predictions)
164+
bulk_import_request.wait_until_done()
165+
166+
name = str(uuid.uuid4())
167+
bulk_import_request = configured_project.upload_annotations(
168+
name=name, annotations=predictions)
169+
bulk_import_request.wait_until_done()
170+
171+
result = configured_project.bulk_import_requests()
172+
assert len(list(result)) == 3
173+
174+
175+
def test_delete(client, configured_project, predictions):
157176
name = str(uuid.uuid4())
158177

159178
bulk_import_request = configured_project.upload_annotations(
160179
name=name, annotations=predictions)
161180
bulk_import_request.wait_until_done()
162-
all_import_requests = client.execute(query_str,
163-
{id_param: configured_project.uid})
164-
assert len(all_import_requests['bulkImportRequests']) == 1
181+
all_import_requests = configured_project.bulk_import_requests()
182+
assert len(list(all_import_requests)) == 1
165183

166184
bulk_import_request.delete()
167-
all_import_requests = client.execute(query_str,
168-
{id_param: configured_project.uid})
169-
assert len(all_import_requests['bulkImportRequests']) == 0
185+
all_import_requests = configured_project.bulk_import_requests()
186+
assert len(list(all_import_requests)) == 0

0 commit comments

Comments
 (0)