Skip to content

Commit fc8c621

Browse files
author
Matt Sokoloff
committed
bulk export queued data rows
1 parent ceb664f commit fc8c621

File tree

3 files changed

+75
-23
lines changed

3 files changed

+75
-23
lines changed

labelbox/schema/project.py

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,35 @@ def labels(self, datasets=None, order_by=None):
160160
return PaginatedCollection(self.client, query_str, {id_param: self.uid},
161161
["project", "labels"], Label)
162162

163+
def export_queued_data_rows(self, timeout_seconds=120):
164+
""" Returns all data rows that are currently enqueued for this project.
165+
166+
Args:
167+
timeout_seconds (float): Max waiting time, in seconds.
168+
Returns:
169+
URL of the data file with this DataRow information. If the server didn't
170+
generate during the `timeout_seconds` period, None is returned.
171+
"""
172+
id_param = "projectId"
173+
query_str = """mutation GetQueuedDataRowsExportUrlPyApi($%s: ID!)
174+
{exportQueuedDataRows(data:{projectId: $%s }) {downloadUrl createdAt status} }
175+
""" % (id_param, id_param)
176+
sleep_time = 2
177+
while True:
178+
res = self.client.execute(query_str, {id_param: self.uid})
179+
res = res["exportQueuedDataRows"]
180+
if res["status"] == "COMPLETE":
181+
return res["downloadUrl"]
182+
183+
timeout_seconds -= sleep_time
184+
if timeout_seconds <= 0:
185+
return None
186+
187+
logger.debug(
188+
"Project '%s' queued data row export, waiting for server...",
189+
self.uid)
190+
time.sleep(sleep_time)
191+
163192
def export_labels(self, timeout_seconds=60):
164193
""" Calls the server-side Label exporting that generates a JSON
165194
payload, and returns the URL to that payload.
@@ -193,13 +222,13 @@ def export_labels(self, timeout_seconds=60):
193222
time.sleep(sleep_time)
194223

195224
def export_issues(self, status=None):
196-
""" Calls the server-side Issues exporting that
225+
""" Calls the server-side Issues exporting that
197226
returns the URL to that payload.
198227
199228
Args:
200229
status (string): valid values: Open, Resolved
201230
Returns:
202-
URL of the data file with this Project's issues.
231+
URL of the data file with this Project's issues.
203232
"""
204233
id_param = "projectId"
205234
status_param = "status"
@@ -229,14 +258,14 @@ def export_issues(self, status=None):
229258
def upsert_instructions(self, instructions_file: str):
230259
"""
231260
* Uploads instructions to the UI. Running more than once will replace the instructions
232-
261+
233262
Args:
234263
instructions_file (str): Path to a local file.
235264
* Must be either a pdf, text, or html file.
236265
237266
Raises:
238267
ValueError:
239-
* project must be setup
268+
* project must be setup
240269
* instructions file must end with one of ".text", ".txt", ".pdf", ".html"
241270
"""
242271

@@ -267,18 +296,18 @@ def upsert_instructions(self, instructions_file: str):
267296

268297
self.client.execute(
269298
"""mutation UpdateFrontendWithExistingOptionsPyApi (
270-
$frontendId: ID!,
271-
$optionsId: ID!,
272-
$name: String!,
273-
$description: String!,
299+
$frontendId: ID!,
300+
$optionsId: ID!,
301+
$name: String!,
302+
$description: String!,
274303
$customizationOptions: String!
275304
) {
276305
updateLabelingFrontend(
277-
where: {id: $frontendId},
306+
where: {id: $frontendId},
278307
data: {name: $name, description: $description}
279308
) {id}
280309
updateLabelingFrontendOptions(
281-
where: {id: $optionsId},
310+
where: {id: $optionsId},
282311
data: {customizationOptions: $customizationOptions}
283312
) {id}
284313
}""", {
@@ -390,10 +419,10 @@ def validate_labeling_parameter_overrides(self, data):
390419

391420
def set_labeling_parameter_overrides(self, data):
392421
""" Adds labeling parameter overrides to this project.
393-
422+
394423
See information on priority here:
395424
https://docs.labelbox.com/en/configure-editor/queue-system#reservation-system
396-
425+
397426
>>> project.set_labeling_parameter_overrides([
398427
>>> (data_row_1, 2, 3), (data_row_2, 1, 4)])
399428
@@ -407,11 +436,11 @@ def set_labeling_parameter_overrides(self, data):
407436
- Minimum priority is 1.
408437
* Priority is not the queue position.
409438
- The position is determined by the relative priority.
410-
- E.g. [(data_row_1, 5,1), (data_row_2, 2,1), (data_row_3, 10,1)]
439+
- E.g. [(data_row_1, 5,1), (data_row_2, 2,1), (data_row_3, 10,1)]
411440
will be assigned in the following order: [data_row_2, data_row_1, data_row_3]
412441
* Datarows with parameter overrides will appear before datarows without overrides.
413442
* The priority only effects items in the queue.
414-
- Assigning a priority will not automatically add the item back into the queue.
443+
- Assigning a priority will not automatically add the item back into the queue.
415444
Number of labels:
416445
* The number of times a data row should be labeled.
417446
- Creates duplicate data rows in a project (one for each number of labels).
@@ -458,7 +487,7 @@ def unset_labeling_parameter_overrides(self, data_rows):
458487
def upsert_review_queue(self, quota_factor):
459488
""" Sets the the proportion of total assets in a project to review.
460489
461-
More information can be found here:
490+
More information can be found here:
462491
https://docs.labelbox.com/en/quality-assurance/review-labels#configure-review-percentage
463492
464493
Args:

tests/integration/conftest.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from collections import namedtuple
22
from enum import Enum
33
from datetime import datetime
4-
from labelbox.orm.db_object import experimental
54
from random import randint
65
from string import ascii_letters
76
from types import SimpleNamespace
@@ -14,6 +13,7 @@
1413
from labelbox.schema.invite import Invite
1514
from labelbox.pagination import PaginatedCollection
1615
from labelbox.schema.user import User
16+
from labelbox import LabelingFrontend
1717
from labelbox import Client
1818

1919
IMG_URL = "https://picsum.photos/200/300"
@@ -83,7 +83,7 @@ def get_invites(client):
8383
Do not use. Only for testing.
8484
"""
8585
query_str = """query GetOrgInvitationsPyApi($from: ID, $first: PageSize) {
86-
organization { id invites(from: $from, first: $first) {
86+
organization { id invites(from: $from, first: $first) {
8787
nodes { id createdAt organizationRoleName inviteeEmail } nextCursor }}}"""
8888
invites = PaginatedCollection(
8989
client,
@@ -199,13 +199,13 @@ def organization(client):
199199
def project_based_user(client, rand_gen):
200200
email = rand_gen(str)
201201
# Use old mutation because it doesn't require users to accept email invites
202-
query_str = """mutation MakeNewUserPyApi {
203-
addMembersToOrganization(
204-
data: {
205-
emails: ["%s@labelbox.com"],
202+
query_str = """mutation MakeNewUserPyApi {
203+
addMembersToOrganization(
204+
data: {
205+
emails: ["%s@labelbox.com"],
206206
orgRoleId: "%s",
207207
projectRoles: []
208-
}
208+
}
209209
) {
210210
newUserId
211211
}
@@ -227,3 +227,17 @@ def project_pack(client):
227227
yield projects
228228
for proj in projects:
229229
proj.delete()
230+
231+
232+
@pytest.fixture
233+
def configured_project(project, client, rand_gen):
234+
dataset = client.create_dataset(name=rand_gen(str), projects=project)
235+
dataset.create_data_row(row_data=IMG_URL)
236+
editor = list(
237+
project.client.get_labeling_frontends(
238+
where=LabelingFrontend.name == "editor"))[0]
239+
empty_ontology = {"tools": [], "classifications": []}
240+
project.setup(editor, empty_ontology)
241+
yield project
242+
dataset.delete()
243+
project.delete()

tests/integration/test_project.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
import pytest
21
import json
2+
33
import requests
4+
import ndjson
5+
import pytest
46

57
from labelbox import Project, LabelingFrontend
68
from labelbox.exceptions import InvalidQueryError
@@ -100,3 +102,10 @@ def test_attach_instructions(client, project):
100102
with pytest.raises(ValueError) as exc_info:
101103
project.upsert_instructions('/tmp/file.invalid_file_extension')
102104
assert "instructions_file must end with one of" in str(exc_info.value)
105+
106+
107+
def test_queued_data_row_export(configured_project):
108+
url = configured_project.export_queued_data_rows()
109+
assert url
110+
result = ndjson.loads(requests.get(url).text)
111+
assert len(result) == 1

0 commit comments

Comments
 (0)