Skip to content

Commit 8b00ee3

Browse files
authored
Merge pull request #259 from Labelbox/ms/test-fixes
Fix tests
2 parents ebc9bf5 + 9b6e18f commit 8b00ee3

File tree

7 files changed

+81
-79
lines changed

7 files changed

+81
-79
lines changed

tests/integration/bulk_import/conftest.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
from labelbox.schema.labeling_frontend import LabelingFrontend
77
from labelbox.schema.annotation_import import MALPredictionImport
88

9-
IMG_URL = "https://picsum.photos/200/300"
10-
119

1210
@pytest.fixture
1311
def ontology():
@@ -103,7 +101,7 @@ def ontology():
103101

104102

105103
@pytest.fixture
106-
def configured_project(client, ontology, rand_gen):
104+
def configured_project(client, ontology, rand_gen, image_url):
107105
project = client.create_project(name=rand_gen(str))
108106
dataset = client.create_dataset(name=rand_gen(str))
109107
editor = list(
@@ -112,7 +110,7 @@ def configured_project(client, ontology, rand_gen):
112110
project.setup(editor, ontology)
113111
data_row_ids = []
114112
for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
115-
data_row_ids.append(dataset.create_data_row(row_data=IMG_URL).uid)
113+
data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
116114
project.datasets.connect(dataset)
117115
project.data_row_ids = data_row_ids
118116
yield project

tests/integration/conftest.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from types import SimpleNamespace
1111

1212
import pytest
13+
import requests
1314

1415
from labelbox import Client
1516
from labelbox import LabelingFrontend
@@ -30,7 +31,7 @@ class Environ(Enum):
3031
STAGING = 'staging'
3132

3233

33-
@pytest.fixture
34+
@pytest.fixture(scope="session")
3435
def environ() -> Environ:
3536
"""
3637
Checks environment variables for LABELBOX_ENVIRON to be
@@ -127,11 +128,16 @@ def execute(self, query=None, params=None, check_naming=True, **kwargs):
127128
return super().execute(query, params, **kwargs)
128129

129130

130-
@pytest.fixture
131+
@pytest.fixture(scope="session")
131132
def client(environ: str):
132133
return IntegrationClient(environ)
133134

134135

136+
@pytest.fixture(scope="session")
137+
def image_url(client):
138+
return client.upload_data(requests.get(IMG_URL).content, sign=True)
139+
140+
135141
@pytest.fixture
136142
def rand_gen():
137143

@@ -187,10 +193,10 @@ def dataset(client, rand_gen):
187193

188194

189195
@pytest.fixture
190-
def datarow(dataset):
196+
def datarow(dataset, image_url):
191197
task = dataset.create_data_rows([
192198
{
193-
"row_data": IMG_URL,
199+
"row_data": image_url,
194200
"external_id": "my-image"
195201
},
196202
])
@@ -204,10 +210,10 @@ def datarow(dataset):
204210

205211

206212
@pytest.fixture
207-
def label_pack(project, rand_gen):
213+
def label_pack(project, rand_gen, image_url):
208214
client = project.client
209215
dataset = client.create_dataset(name=rand_gen(str), projects=project)
210-
data_row = dataset.create_data_row(row_data=IMG_URL)
216+
data_row = dataset.create_data_row(row_data=image_url)
211217
label = project.create_label(data_row=data_row, label=rand_gen(str))
212218
yield LabelPack(project, dataset, data_row, label)
213219
dataset.delete()
@@ -276,9 +282,9 @@ def project_pack(client):
276282

277283

278284
@pytest.fixture
279-
def configured_project(project, client, rand_gen):
285+
def configured_project(project, client, rand_gen, image_url):
280286
dataset = client.create_dataset(name=rand_gen(str), projects=project)
281-
dataset.create_data_row(row_data=IMG_URL)
287+
dataset.create_data_row(row_data=image_url)
282288
editor = list(
283289
project.client.get_labeling_frontends(
284290
where=LabelingFrontend.name == "editor"))[0]
@@ -318,10 +324,11 @@ def submit(project_id, data_row_id):
318324

319325

320326
@pytest.fixture
321-
def configured_project_with_label(client, rand_gen, annotation_submit_fn):
327+
def configured_project_with_label(client, rand_gen, annotation_submit_fn,
328+
image_url):
322329
project = client.create_project(name=rand_gen(str))
323330
dataset = client.create_dataset(name=rand_gen(str), projects=project)
324-
data_row = dataset.create_data_row(row_data=IMG_URL)
331+
data_row = dataset.create_data_row(row_data=image_url)
325332
editor = list(
326333
project.client.get_labeling_frontends(
327334
where=LabelingFrontend.name == "editor"))[0]

tests/integration/test_data_row_metadata.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from labelbox.schema.data_row_metadata import DataRowMetadataField, DataRowMetadata, DeleteDataRowMetadata, \
77
DataRowMetadataOntology
88

9-
IMG_URL = "https://picsum.photos/id/829/200/300"
109
FAKE_SCHEMA_ID = "0" * 25
1110
SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
1211
TRAIN_SPLIT_ID = "cko8sbscr0003h2dk04w86hof"
@@ -22,13 +21,13 @@ def mdo(client):
2221

2322

2423
@pytest.fixture
25-
def big_dataset(dataset: Dataset):
24+
def big_dataset(dataset: Dataset, image_url):
2625
task = dataset.create_data_rows([
2726
{
28-
"row_data": IMG_URL,
27+
"row_data": image_url,
2928
"external_id": "my-image"
3029
},
31-
] * 1000)
30+
] * 250)
3231
task.wait_till_done()
3332

3433
yield dataset
@@ -97,35 +96,37 @@ def test_bulk_delete_datarow_metadata(datarow, mdo):
9796
mdo.bulk_upsert([metadata])
9897

9998
assert len(datarow.metadata["fields"])
100-
101-
mdo.bulk_delete([
102-
DeleteDataRowMetadata(data_row_id=datarow.uid,
103-
fields=[m.schema_id for m in metadata.fields])
104-
])
105-
106-
assert not len(datarow.metadata["fields"])
99+
upload_ids = [m.schema_id for m in metadata.fields]
100+
mdo.bulk_delete(
101+
[DeleteDataRowMetadata(data_row_id=datarow.uid, fields=upload_ids)])
102+
remaining_ids = set([f['schema_id'] for f in datarow.metadata["fields"]])
103+
assert not len(remaining_ids.intersection(set(upload_ids)))
107104

108105

109106
def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
110107
"""Delete a single from metadata"""
111-
assert not len(datarow.metadata["fields"])
108+
n_fields = len(datarow.metadata["fields"])
112109

113110
metadata = make_metadata(datarow.uid)
114111
mdo.bulk_upsert([metadata])
115112

116-
assert len(datarow.metadata["fields"])
113+
assert len(datarow.metadata["fields"]) == (n_fields + 5)
117114

118115
mdo.bulk_delete([
119116
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[TEXT_SCHEMA_ID])
120117
])
121118

122-
assert len(datarow.metadata["fields"]) == 4
119+
assert len(datarow.metadata["fields"]) == (n_fields + 4)
123120

124121

125122
@pytest.mark.slow
126123
def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
127124
metadata = []
128-
for dr in big_dataset.export_data_rows():
125+
n_fields_start = 0
126+
for idx, dr in enumerate(big_dataset.export_data_rows()):
127+
if idx == 0:
128+
n_fields_start = len(dr.metadata["fields"])
129+
129130
metadata.append(
130131
DataRowMetadata(data_row_id=dr.uid,
131132
fields=[
@@ -151,24 +152,26 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
151152
errors = mdo.bulk_delete(deletes)
152153
assert len(errors) == 0
153154
for dr in big_dataset.export_data_rows():
154-
assert len(dr.metadata["fields"]) == 1
155+
assert len(dr.metadata["fields"]) == 1 + n_fields_start
155156
break
156157

157158

158159
def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
159160
"""test bulk deletes for non non fields"""
160-
assert not len(datarow.metadata["fields"])
161+
n_fields = len(datarow.metadata["fields"])
161162
metadata = make_metadata(datarow.uid)
162163
metadata.fields = [
163164
m for m in metadata.fields if m.schema_id == SPLIT_SCHEMA_ID
164165
]
165166
mdo.bulk_upsert([metadata])
166-
assert len(datarow.metadata["fields"])
167+
assert len(datarow.metadata["fields"]) == len(
168+
set([x.schema_id for x in metadata.fields] +
169+
[x['schema_id'] for x in datarow.metadata["fields"]]))
167170

168171
mdo.bulk_delete([
169172
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
170173
])
171-
assert not len(datarow.metadata["fields"])
174+
assert len(datarow.metadata["fields"]) == n_fields
172175

173176

174177
def test_raise_enum_upsert_schema_error(datarow, mdo):
@@ -196,7 +199,6 @@ def test_upsert_non_existent_schema_id(datarow, mdo):
196199

197200

198201
def test_delete_non_existent_schema_id(datarow, mdo):
199-
assert not len(datarow.metadata["fields"])
200202
mdo.bulk_delete([
201203
DeleteDataRowMetadata(data_row_id=datarow.uid,
202204
fields=[EMBEDDING_SCHEMA_ID])
@@ -207,15 +209,18 @@ def test_delete_non_existent_schema_id(datarow, mdo):
207209
@pytest.mark.slow
208210
def test_large_bulk_delete_non_existent_schema_id(big_dataset, mdo):
209211
deletes = []
210-
for dr in big_dataset.export_data_rows():
212+
n_fields_start = 0
213+
for idx, dr in enumerate(big_dataset.export_data_rows()):
214+
if idx == 0:
215+
n_fields_start = len(dr.metadata["fields"])
211216
deletes.append(
212217
DeleteDataRowMetadata(data_row_id=dr.uid,
213218
fields=[EMBEDDING_SCHEMA_ID]))
214219
errors = mdo.bulk_delete(deletes)
215220
assert len(errors) == 0
216221

217222
for dr in big_dataset.export_data_rows():
218-
assert not len(dr.metadata["fields"])
223+
assert len(dr.metadata["fields"]) == n_fields_start
219224
break
220225

221226

tests/integration/test_data_rows.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,22 @@
66
from labelbox import DataRow
77
from labelbox.exceptions import InvalidQueryError
88

9-
IMG_URL = "https://picsum.photos/id/829/200/300"
10-
119

1210
def test_get_data_row(datarow, client):
1311
assert client.get_data_row(datarow.uid)
1412

1513

16-
def test_data_row_bulk_creation(dataset, rand_gen):
14+
def test_data_row_bulk_creation(dataset, rand_gen, image_url):
1715
client = dataset.client
1816
assert len(list(dataset.data_rows())) == 0
1917

2018
# Test creation using URL
2119
task = dataset.create_data_rows([
2220
{
23-
DataRow.row_data: IMG_URL
21+
DataRow.row_data: image_url
2422
},
2523
{
26-
"row_data": IMG_URL
24+
"row_data": image_url
2725
},
2826
])
2927
assert task in client.get_user().created_tasks()
@@ -35,7 +33,7 @@ def test_data_row_bulk_creation(dataset, rand_gen):
3533

3634
data_rows = list(dataset.data_rows())
3735
assert len(data_rows) == 2
38-
assert {data_row.row_data for data_row in data_rows} == {IMG_URL}
36+
assert {data_row.row_data for data_row in data_rows} == {image_url}
3937

4038
# Test creation using file name
4139
with NamedTemporaryFile() as fp:
@@ -48,38 +46,38 @@ def test_data_row_bulk_creation(dataset, rand_gen):
4846

4947
data_rows = list(dataset.data_rows())
5048
assert len(data_rows) == 3
51-
url = ({data_row.row_data for data_row in data_rows} - {IMG_URL}).pop()
49+
url = ({data_row.row_data for data_row in data_rows} - {image_url}).pop()
5250
assert requests.get(url).content == data
5351

5452
data_rows[0].delete()
5553

5654

5755
@pytest.mark.slow
58-
def test_data_row_large_bulk_creation(dataset, rand_gen):
56+
def test_data_row_large_bulk_creation(dataset, image_url):
5957
# Do a longer task and expect it not to be complete immediately
6058
with NamedTemporaryFile() as fp:
6159
fp.write("Test data".encode())
6260
fp.flush()
6361
task = dataset.create_data_rows([{
64-
DataRow.row_data: IMG_URL
65-
}] * 4500 + [fp.name] * 500)
62+
DataRow.row_data: image_url
63+
}] * 750 + [fp.name] * 250)
6664
assert task.status == "IN_PROGRESS"
67-
task.wait_till_done()
65+
task.wait_till_done(timeout_seconds=120)
6866
assert task.status == "COMPLETE"
6967
data_rows = len(list(dataset.data_rows())) == 5003
7068

7169

7270
@pytest.mark.xfail(reason="DataRow.dataset() relationship not set")
73-
def test_data_row_single_creation(dataset, rand_gen):
71+
def test_data_row_single_creation(dataset, rand_gen, image_url):
7472
client = dataset.client
7573
assert len(list(dataset.data_rows())) == 0
7674

77-
data_row = dataset.create_data_row(row_data=IMG_URL)
75+
data_row = dataset.create_data_row(row_data=image_url)
7876
assert len(list(dataset.data_rows())) == 1
7977
assert data_row.dataset() == dataset
8078
assert data_row.created_by() == client.get_user()
8179
assert data_row.organization() == client.get_organization()
82-
assert requests.get(IMG_URL).content == \
80+
assert requests.get(image_url).content == \
8381
requests.get(data_row.row_data).content
8482
assert data_row.media_attributes is not None
8583

@@ -92,9 +90,9 @@ def test_data_row_single_creation(dataset, rand_gen):
9290
assert requests.get(data_row_2.row_data).content == data
9391

9492

95-
def test_data_row_update(dataset, rand_gen):
93+
def test_data_row_update(dataset, rand_gen, image_url):
9694
external_id = rand_gen(str)
97-
data_row = dataset.create_data_row(row_data=IMG_URL,
95+
data_row = dataset.create_data_row(row_data=image_url,
9896
external_id=external_id)
9997
assert data_row.external_id == external_id
10098

@@ -103,14 +101,14 @@ def test_data_row_update(dataset, rand_gen):
103101
assert data_row.external_id == external_id_2
104102

105103

106-
def test_data_row_filtering_sorting(dataset, rand_gen):
104+
def test_data_row_filtering_sorting(dataset, image_url):
107105
task = dataset.create_data_rows([
108106
{
109-
DataRow.row_data: IMG_URL,
107+
DataRow.row_data: image_url,
110108
DataRow.external_id: "row1"
111109
},
112110
{
113-
DataRow.row_data: IMG_URL,
111+
DataRow.row_data: image_url,
114112
DataRow.external_id: "row2"
115113
},
116114
])
@@ -133,9 +131,9 @@ def test_data_row_filtering_sorting(dataset, rand_gen):
133131
dataset.data_rows(order_by=DataRow.external_id.desc)) == [row2, row1]
134132

135133

136-
def test_data_row_deletion(dataset, rand_gen):
134+
def test_data_row_deletion(dataset, image_url):
137135
task = dataset.create_data_rows([{
138-
DataRow.row_data: IMG_URL,
136+
DataRow.row_data: image_url,
139137
DataRow.external_id: str(i)
140138
} for i in range(10)])
141139
task.wait_till_done()
@@ -159,13 +157,13 @@ def test_data_row_deletion(dataset, rand_gen):
159157
assert {dr.external_id for dr in data_rows} == expected
160158

161159

162-
def test_data_row_iteration(dataset, rand_gen) -> None:
160+
def test_data_row_iteration(dataset, image_url) -> None:
163161
task = dataset.create_data_rows([
164162
{
165-
DataRow.row_data: IMG_URL
163+
DataRow.row_data: image_url
166164
},
167165
{
168-
"row_data": IMG_URL
166+
"row_data": image_url
169167
},
170168
])
171169
task.wait_till_done()

0 commit comments

Comments
 (0)