Skip to content

Commit 6c16ee5

Browse files
author
Matt Sokoloff
committed
added tests and cache for ndjson file fetching
1 parent df1c1ba commit 6c16ee5

File tree

3 files changed

+54
-22
lines changed

3 files changed

+54
-22
lines changed

labelbox/client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,15 @@ def upload_file(self, path: str) -> str:
253253
def upload_data(self,
254254
content: bytes,
255255
filename: str = None,
256-
content_type: str = None) -> str:
256+
content_type: str = None,
257+
sign: bool = False) -> str:
257258
""" Uploads the given data (bytes) to Labelbox.
258259
259260
Args:
260261
content: bytestring to upload
261262
filename: name of the upload
262263
content_type: content type of data uploaded
264+
sign: whether or not to sign the url
263265
264266
Returns:
265267
str, the URL of uploaded data.
@@ -274,7 +276,7 @@ def upload_data(self,
274276
"variables": {
275277
"file": None,
276278
"contentLength": len(content),
277-
"sign": False
279+
"sign": sign
278280
},
279281
"query":
280282
"""mutation UploadFile($file: Upload!, $contentLength: Int!,

labelbox/schema/bulk_import_request.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import time
33
from uuid import UUID, uuid4
4+
import functools
45

56
import logging
67
from pathlib import Path
@@ -114,35 +115,58 @@ class BulkImportRequest(DbObject):
114115
created_by = Relationship.ToOne("User", False, "created_by")
115116

116117
@property
117-
def inputs(self):
118+
def inputs(self) -> Optional[List[Dict[str, str]]]:
118119
"""
119120
Inputs for each individual annotation uploaded.
120-
* This should match the ndjson annotations that you have uploaded.
121-
* This information will expire after 24 hours.
122-
"""
123-
return self._fetch_remote_ndjson(self.input_file_url)
121+
This should match the ndjson annotations that you have uploaded.
124122
123+
Returns:
124+
Uploaded ndjsons.
125+
126+
* This information will expire after 24 hours.
127+
"""
128+
return self._fetch_remote_ndjson(self.input_file_url)
129+
125130
@property
126-
def errors(self):
131+
def errors(self) -> Optional[List[Dict[str, str]]]:
127132
"""
128133
Errors for each individual annotation uploaded.
129-
* Returns an empty list if there are no errors and None if the update is still running.
134+
135+
Returns:
136+
Empty list if there are no errors and None if the update is still running.
137+
If there are errors, and the job has completed then a list of dicts containing the error messages will be returned.
138+
130139
* This information will expire after 24 hours.
131140
"""
132-
return self._fetch_remote_ndjson(self.error_file_url)
141+
return self._fetch_remote_ndjson(self.error_file_url)
133142

134143
@property
135-
def statuses(self):
144+
def statuses(self) -> Optional[List[Dict[str, str]]]:
136145
"""
137146
Status for each individual annotation uploaded.
138-
* Returns a status for each row if the upload is done running and was successful. Otherwise it returns None.
147+
148+
Returns:
149+
A status for each annotation if the upload is done running and was successful. Otherwise it returns None.
150+
139151
* This information will expire after 24 hours.
140152
"""
141-
return self._fetch_remote_ndjson(self.status_file_url)
153+
return self._fetch_remote_ndjson(self.status_file_url)
154+
155+
@functools.lru_cache()
156+
def _fetch_remote_ndjson(
157+
self, url: Optional[str]) -> Optional[List[Dict[str, str]]]:
158+
"""
159+
Fetches the remote ndjson file and caches the results.
142160
143-
def _fetch_remote_ndjson(self, url):
161+
Args:
162+
url (str): either the input_file_url, error_file_url, status_file_url, or None
163+
urls are None when the file is unavailable.
164+
Returns:
165+
None if the url is None or the ndjson as a list of dicts.
166+
"""
144167
if url is not None:
145168
return ndjson.loads(requests.get(url).text)
169+
return None
146170

147171
def refresh(self) -> None:
148172
"""Synchronizes values of all fields with the database.

tests/integration/bulk_import/test_bulk_import_request.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,20 +121,26 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
121121

122122

123123
@pytest.mark.slow
124-
def test_wait_till_done(configured_project):
124+
def test_wait_till_done(rectangle_inference, configured_project):
125125
name = str(uuid.uuid4())
126-
url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
126+
url = configured_project.client.upload_data(content=ndjson.dumps(
127+
[rectangle_inference]),
128+
sign=True)
127129
bulk_import_request = configured_project.upload_annotations(name=name,
128130
annotations=url,
129131
validate=False)
130132

131133
bulk_import_request.wait_until_done()
132-
133-
assert (bulk_import_request.state == BulkImportRequestState.FINISHED or
134-
bulk_import_request.state == BulkImportRequestState.FAILED)
135-
136-
assert bulk_import_request.errors is not None
137-
assert bulk_import_request.inputs is not None
134+
assert bulk_import_request.state == BulkImportRequestState.FINISHED
135+
136+
#Check that the status files are being returned as expected
137+
assert len(bulk_import_request.errors) == 0
138+
assert len(bulk_import_request.inputs) == 1
139+
assert bulk_import_request.inputs[0]['uuid'] == rectangle_inference['uuid']
140+
assert len(bulk_import_request.statuses) == 1
141+
assert bulk_import_request.statuses[0]['status'] == 'SUCCESS'
142+
assert bulk_import_request.statuses[0]['uuid'] == rectangle_inference[
143+
'uuid']
138144

139145

140146
def assert_file_content(url: str, predictions):

0 commit comments

Comments
 (0)