Skip to content

Commit 4416109

Browse files
author
Matt Sokoloff
committed
Merge branch 'develop' of https://github.com/Labelbox/labelbox-python into ms/attachments
2 parents b758170 + e5c6a58 commit 4416109

File tree

10 files changed

+129
-90
lines changed

10 files changed

+129
-90
lines changed

labelbox/data/serialization/labelbox_v1/label.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ class LBV1Label(BaseModel):
131131
seconds_to_label: Optional[float] = Extra('Seconds to Label')
132132
agreement: Optional[float] = Extra('Agreement')
133133
benchmark_agreement: Optional[float] = Extra('Benchmark Agreement')
134-
benchmark_id: Optional[float] = Extra('Benchmark ID')
134+
benchmark_id: Optional[str] = Extra('Benchmark ID')
135135
dataset_name: Optional[str] = Extra('Dataset Name')
136136
reviews: Optional[List[Review]] = Extra('Reviews')
137137
label_url: Optional[str] = Extra('View Label')

labelbox/schema/project.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,26 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool:
582582
return res["project"]["showPredictionsToLabelers"][
583583
"showingPredictionsToLabelers"]
584584

585+
def bulk_import_requests(self):
586+
""" Returns bulk import request objects which are used in model-assisted labeling.
587+
These are returned with the oldest first, and most recent last.
588+
"""
589+
590+
id_param = "project_id"
591+
query_str = """query ListAllImportRequestsPyApi($%s: ID!) {
592+
bulkImportRequests (
593+
where: { projectId: $%s }
594+
skip: %%d
595+
first: %%d
596+
) {
597+
%s
598+
}
599+
}""" % (id_param, id_param,
600+
query.results_query_part(Entity.BulkImportRequest))
601+
return PaginatedCollection(self.client, query_str,
602+
{id_param: str(self.uid)},
603+
["bulkImportRequests"], BulkImportRequest)
604+
585605
def upload_annotations(
586606
self,
587607
name: str,

tests/integration/bulk_import/conftest.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
from labelbox.schema.labeling_frontend import LabelingFrontend
77
from labelbox.schema.annotation_import import MALPredictionImport
88

9-
IMG_URL = "https://picsum.photos/200/300"
10-
119

1210
@pytest.fixture
1311
def ontology():
@@ -103,7 +101,7 @@ def ontology():
103101

104102

105103
@pytest.fixture
106-
def configured_project(client, ontology, rand_gen):
104+
def configured_project(client, ontology, rand_gen, image_url):
107105
project = client.create_project(name=rand_gen(str))
108106
dataset = client.create_dataset(name=rand_gen(str))
109107
editor = list(
@@ -112,7 +110,7 @@ def configured_project(client, ontology, rand_gen):
112110
project.setup(editor, ontology)
113111
data_row_ids = []
114112
for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
115-
data_row_ids.append(dataset.create_data_row(row_data=IMG_URL).uid)
113+
data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
116114
project.datasets.connect(dataset)
117115
project.data_row_ids = data_row_ids
118116
yield project

tests/integration/bulk_import/test_bulk_import_request.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,21 +149,38 @@ def assert_file_content(url: str, predictions):
149149
assert response.text == ndjson.dumps(predictions)
150150

151151

152-
def test_delete(client, configured_project, predictions):
152+
def test_project_bulk_import_requests(client, configured_project, predictions):
153+
result = configured_project.bulk_import_requests()
154+
assert len(list(result)) == 0
155+
156+
name = str(uuid.uuid4())
157+
bulk_import_request = configured_project.upload_annotations(
158+
name=name, annotations=predictions)
159+
bulk_import_request.wait_until_done()
153160

154-
id_param = "project_id"
155-
query_str = """query bulk_import_requestsPyApi($%s: ID!) {bulkImportRequests(where: {projectId: $%s}) {id}}""" % (
156-
id_param, id_param)
161+
name = str(uuid.uuid4())
162+
bulk_import_request = configured_project.upload_annotations(
163+
name=name, annotations=predictions)
164+
bulk_import_request.wait_until_done()
165+
166+
name = str(uuid.uuid4())
167+
bulk_import_request = configured_project.upload_annotations(
168+
name=name, annotations=predictions)
169+
bulk_import_request.wait_until_done()
170+
171+
result = configured_project.bulk_import_requests()
172+
assert len(list(result)) == 3
173+
174+
175+
def test_delete(client, configured_project, predictions):
157176
name = str(uuid.uuid4())
158177

159178
bulk_import_request = configured_project.upload_annotations(
160179
name=name, annotations=predictions)
161180
bulk_import_request.wait_until_done()
162-
all_import_requests = client.execute(query_str,
163-
{id_param: configured_project.uid})
164-
assert len(all_import_requests['bulkImportRequests']) == 1
181+
all_import_requests = configured_project.bulk_import_requests()
182+
assert len(list(all_import_requests)) == 1
165183

166184
bulk_import_request.delete()
167-
all_import_requests = client.execute(query_str,
168-
{id_param: configured_project.uid})
169-
assert len(all_import_requests['bulkImportRequests']) == 0
185+
all_import_requests = configured_project.bulk_import_requests()
186+
assert len(list(all_import_requests)) == 0

tests/integration/conftest.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from types import SimpleNamespace
1111

1212
import pytest
13+
import requests
1314

1415
from labelbox import Client
1516
from labelbox import LabelingFrontend
@@ -30,7 +31,7 @@ class Environ(Enum):
3031
STAGING = 'staging'
3132

3233

33-
@pytest.fixture
34+
@pytest.fixture(scope="session")
3435
def environ() -> Environ:
3536
"""
3637
Checks environment variables for LABELBOX_ENVIRON to be
@@ -127,11 +128,16 @@ def execute(self, query=None, params=None, check_naming=True, **kwargs):
127128
return super().execute(query, params, **kwargs)
128129

129130

130-
@pytest.fixture
131+
@pytest.fixture(scope="session")
131132
def client(environ: str):
132133
return IntegrationClient(environ)
133134

134135

136+
@pytest.fixture(scope="session")
137+
def image_url(client):
138+
return client.upload_data(requests.get(IMG_URL).content, sign=True)
139+
140+
135141
@pytest.fixture
136142
def rand_gen():
137143

@@ -187,10 +193,10 @@ def dataset(client, rand_gen):
187193

188194

189195
@pytest.fixture
190-
def datarow(dataset):
196+
def datarow(dataset, image_url):
191197
task = dataset.create_data_rows([
192198
{
193-
"row_data": IMG_URL,
199+
"row_data": image_url,
194200
"external_id": "my-image"
195201
},
196202
])
@@ -204,10 +210,10 @@ def datarow(dataset):
204210

205211

206212
@pytest.fixture
207-
def label_pack(project, rand_gen):
213+
def label_pack(project, rand_gen, image_url):
208214
client = project.client
209215
dataset = client.create_dataset(name=rand_gen(str), projects=project)
210-
data_row = dataset.create_data_row(row_data=IMG_URL)
216+
data_row = dataset.create_data_row(row_data=image_url)
211217
label = project.create_label(data_row=data_row, label=rand_gen(str))
212218
yield LabelPack(project, dataset, data_row, label)
213219
dataset.delete()
@@ -276,9 +282,9 @@ def project_pack(client):
276282

277283

278284
@pytest.fixture
279-
def configured_project(project, client, rand_gen):
285+
def configured_project(project, client, rand_gen, image_url):
280286
dataset = client.create_dataset(name=rand_gen(str), projects=project)
281-
dataset.create_data_row(row_data=IMG_URL)
287+
dataset.create_data_row(row_data=image_url)
282288
editor = list(
283289
project.client.get_labeling_frontends(
284290
where=LabelingFrontend.name == "editor"))[0]
@@ -318,10 +324,11 @@ def submit(project_id, data_row_id):
318324

319325

320326
@pytest.fixture
321-
def configured_project_with_label(client, rand_gen, annotation_submit_fn):
327+
def configured_project_with_label(client, rand_gen, annotation_submit_fn,
328+
image_url):
322329
project = client.create_project(name=rand_gen(str))
323330
dataset = client.create_dataset(name=rand_gen(str), projects=project)
324-
data_row = dataset.create_data_row(row_data=IMG_URL)
331+
data_row = dataset.create_data_row(row_data=image_url)
325332
editor = list(
326333
project.client.get_labeling_frontends(
327334
where=LabelingFrontend.name == "editor"))[0]

tests/integration/test_data_row_metadata.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from labelbox.schema.data_row_metadata import DataRowMetadataField, DataRowMetadata, DeleteDataRowMetadata, \
77
DataRowMetadataOntology
88

9-
IMG_URL = "https://picsum.photos/id/829/200/300"
109
FAKE_SCHEMA_ID = "0" * 25
1110
SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
1211
TRAIN_SPLIT_ID = "cko8sbscr0003h2dk04w86hof"
@@ -22,13 +21,13 @@ def mdo(client):
2221

2322

2423
@pytest.fixture
25-
def big_dataset(dataset: Dataset):
24+
def big_dataset(dataset: Dataset, image_url):
2625
task = dataset.create_data_rows([
2726
{
28-
"row_data": IMG_URL,
27+
"row_data": image_url,
2928
"external_id": "my-image"
3029
},
31-
] * 1000)
30+
] * 250)
3231
task.wait_till_done()
3332

3433
yield dataset
@@ -97,35 +96,37 @@ def test_bulk_delete_datarow_metadata(datarow, mdo):
9796
mdo.bulk_upsert([metadata])
9897

9998
assert len(datarow.metadata["fields"])
100-
101-
mdo.bulk_delete([
102-
DeleteDataRowMetadata(data_row_id=datarow.uid,
103-
fields=[m.schema_id for m in metadata.fields])
104-
])
105-
106-
assert not len(datarow.metadata["fields"])
99+
upload_ids = [m.schema_id for m in metadata.fields]
100+
mdo.bulk_delete(
101+
[DeleteDataRowMetadata(data_row_id=datarow.uid, fields=upload_ids)])
102+
remaining_ids = set([f['schema_id'] for f in datarow.metadata["fields"]])
103+
assert not len(remaining_ids.intersection(set(upload_ids)))
107104

108105

109106
def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
110107
"""Delete a single from metadata"""
111-
assert not len(datarow.metadata["fields"])
108+
n_fields = len(datarow.metadata["fields"])
112109

113110
metadata = make_metadata(datarow.uid)
114111
mdo.bulk_upsert([metadata])
115112

116-
assert len(datarow.metadata["fields"])
113+
assert len(datarow.metadata["fields"]) == (n_fields + 5)
117114

118115
mdo.bulk_delete([
119116
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[TEXT_SCHEMA_ID])
120117
])
121118

122-
assert len(datarow.metadata["fields"]) == 4
119+
assert len(datarow.metadata["fields"]) == (n_fields + 4)
123120

124121

125122
@pytest.mark.slow
126123
def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
127124
metadata = []
128-
for dr in big_dataset.export_data_rows():
125+
n_fields_start = 0
126+
for idx, dr in enumerate(big_dataset.export_data_rows()):
127+
if idx == 0:
128+
n_fields_start = len(dr.metadata["fields"])
129+
129130
metadata.append(
130131
DataRowMetadata(data_row_id=dr.uid,
131132
fields=[
@@ -151,24 +152,26 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
151152
errors = mdo.bulk_delete(deletes)
152153
assert len(errors) == 0
153154
for dr in big_dataset.export_data_rows():
154-
assert len(dr.metadata["fields"]) == 1
155+
assert len(dr.metadata["fields"]) == 1 + n_fields_start
155156
break
156157

157158

158159
def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
159160
"""test bulk deletes for non non fields"""
160-
assert not len(datarow.metadata["fields"])
161+
n_fields = len(datarow.metadata["fields"])
161162
metadata = make_metadata(datarow.uid)
162163
metadata.fields = [
163164
m for m in metadata.fields if m.schema_id == SPLIT_SCHEMA_ID
164165
]
165166
mdo.bulk_upsert([metadata])
166-
assert len(datarow.metadata["fields"])
167+
assert len(datarow.metadata["fields"]) == len(
168+
set([x.schema_id for x in metadata.fields] +
169+
[x['schema_id'] for x in datarow.metadata["fields"]]))
167170

168171
mdo.bulk_delete([
169172
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
170173
])
171-
assert not len(datarow.metadata["fields"])
174+
assert len(datarow.metadata["fields"]) == n_fields
172175

173176

174177
def test_raise_enum_upsert_schema_error(datarow, mdo):
@@ -196,7 +199,6 @@ def test_upsert_non_existent_schema_id(datarow, mdo):
196199

197200

198201
def test_delete_non_existent_schema_id(datarow, mdo):
199-
assert not len(datarow.metadata["fields"])
200202
mdo.bulk_delete([
201203
DeleteDataRowMetadata(data_row_id=datarow.uid,
202204
fields=[EMBEDDING_SCHEMA_ID])
@@ -207,15 +209,18 @@ def test_delete_non_existent_schema_id(datarow, mdo):
207209
@pytest.mark.slow
208210
def test_large_bulk_delete_non_existent_schema_id(big_dataset, mdo):
209211
deletes = []
210-
for dr in big_dataset.export_data_rows():
212+
n_fields_start = 0
213+
for idx, dr in enumerate(big_dataset.export_data_rows()):
214+
if idx == 0:
215+
n_fields_start = len(dr.metadata["fields"])
211216
deletes.append(
212217
DeleteDataRowMetadata(data_row_id=dr.uid,
213218
fields=[EMBEDDING_SCHEMA_ID]))
214219
errors = mdo.bulk_delete(deletes)
215220
assert len(errors) == 0
216221

217222
for dr in big_dataset.export_data_rows():
218-
assert not len(dr.metadata["fields"])
223+
assert len(dr.metadata["fields"]) == n_fields_start
219224
break
220225

221226

0 commit comments

Comments
 (0)