Skip to content

Commit 714f7b1

Browse files
authored
Revert "[PLT-1107] Add validation for empty row_data for Dataset create_data_rows (#1667)" (#1670)
2 parents e23c301 + a645939 commit 714f7b1

File tree

5 files changed

+17
-156
lines changed

5 files changed

+17
-156
lines changed

libs/labelbox/src/labelbox/schema/dataset.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@
3232
from labelbox.schema.task import Task, DataUpsertTask
3333
from labelbox.schema.user import User
3434
from labelbox.schema.iam_integration import IAMIntegration
35-
from labelbox.schema.internal.data_row_upsert_item import (DataRowItemBase,
36-
DataRowUpsertItem,
37-
DataRowCreateItem)
35+
from labelbox.schema.internal.data_row_upsert_item import (DataRowUpsertItem)
3836
import labelbox.schema.internal.data_row_uploader as data_row_uploader
3937
from labelbox.schema.internal.descriptor_file_creator import DescriptorFileCreator
4038
from labelbox.schema.internal.datarow_upload_constants import (
@@ -290,9 +288,10 @@ def create_data_rows(self,
290288
string_items = [item for item in items if isinstance(item, str)]
291289
dict_items = [item for item in items if isinstance(item, dict)]
292290
dict_string_items = []
291+
293292
if len(string_items) > 0:
294293
dict_string_items = self._build_from_local_paths(string_items)
295-
specs = DataRowCreateItem.build(self.uid,
294+
specs = DataRowUpsertItem.build(self.uid,
296295
dict_items + dict_string_items)
297296
return self._exec_upsert_data_rows(specs, file_upload_thread_count)
298297

@@ -614,7 +613,7 @@ def upsert_data_rows(self,
614613

615614
def _exec_upsert_data_rows(
616615
self,
617-
specs: List[DataRowItemBase],
616+
specs: List[DataRowUpsertItem],
618617
file_upload_thread_count: int = FILE_UPLOAD_THREAD_COUNT
619618
) -> "DataUpsertTask":
620619

libs/labelbox/src/labelbox/schema/internal/data_row_uploader.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import List
44

55
from labelbox import pydantic_compat
6-
from labelbox.schema.internal.data_row_upsert_item import DataRowItemBase, DataRowUpsertItem, DataRowCreateItem
6+
from labelbox.schema.internal.data_row_upsert_item import DataRowUpsertItem
77
from labelbox.schema.internal.descriptor_file_creator import DescriptorFileCreator
88

99

@@ -16,19 +16,14 @@ class UploadManifest(pydantic_compat.BaseModel):
1616
SOURCE_SDK = "SDK"
1717

1818

19-
def upload_in_chunks(client, specs: List[DataRowItemBase],
19+
def upload_in_chunks(client, specs: List[DataRowUpsertItem],
2020
file_upload_thread_count: int,
2121
max_chunk_size_bytes: int) -> UploadManifest:
2222
empty_specs = list(filter(lambda spec: spec.is_empty(), specs))
2323

2424
if empty_specs:
2525
ids = list(map(lambda spec: spec.id.get("value"), empty_specs))
26-
ids = list(filter(lambda x: x is not None and len(x) > 0, ids))
27-
if len(ids) > 0:
28-
raise ValueError(
29-
f"The following items have an empty payload: {ids}")
30-
else: # case of create items
31-
raise ValueError("Some items have an empty payload")
26+
raise ValueError(f"The following items have an empty payload: {ids}")
3227

3328
chunk_uris = DescriptorFileCreator(client).create(
3429
specs, max_chunk_size_bytes=max_chunk_size_bytes)
Lines changed: 3 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,23 @@
1-
from abc import ABC, abstractmethod
2-
31
from typing import List, Tuple, Optional
42

5-
from labelbox.pydantic_compat import BaseModel
63
from labelbox.schema.identifiable import UniqueId, GlobalKey
7-
from labelbox import pydantic_compat
8-
from labelbox.schema.data_row import DataRow
4+
from labelbox.pydantic_compat import BaseModel
95

106

11-
class DataRowItemBase(ABC, pydantic_compat.BaseModel):
7+
class DataRowUpsertItem(BaseModel):
128
"""
139
Base class for creating payloads for upsert operations.
1410
"""
15-
1611
id: dict
1712
payload: dict
1813

19-
@abstractmethod
20-
def is_empty(self) -> bool:
21-
...
22-
2314
@classmethod
2415
def build(
2516
cls,
2617
dataset_id: str,
2718
items: List[dict],
2819
key_types: Optional[Tuple[type, ...]] = ()
29-
) -> List["DataRowItemBase"]:
20+
) -> List["DataRowUpsertItem"]:
3021
upload_items = []
3122

3223
for item in items:
@@ -50,46 +41,10 @@ def build(
5041
upload_items.append(cls(payload=item, id=key))
5142
return upload_items
5243

53-
54-
class DataRowUpsertItem(DataRowItemBase):
55-
5644
def is_empty(self) -> bool:
5745
"""
5846
The payload is considered empty if it's actually empty or the only key is `dataset_id`.
5947
:return: bool
6048
"""
6149
return (not self.payload or
6250
len(self.payload.keys()) == 1 and "dataset_id" in self.payload)
63-
64-
@classmethod
65-
def build(
66-
cls,
67-
dataset_id: str,
68-
items: List[dict],
69-
key_types: Optional[Tuple[type, ...]] = (UniqueId, GlobalKey)
70-
) -> List["DataRowItemBase"]:
71-
return super().build(dataset_id, items, (UniqueId, GlobalKey))
72-
73-
74-
class DataRowCreateItem(DataRowItemBase):
75-
76-
def is_empty(self) -> bool:
77-
"""
78-
The payload is considered empty if it's actually empty or row_data is empty
79-
or the only key is `dataset_id`.
80-
:return: bool
81-
"""
82-
row_data = self.payload.get("row_data", None) or self.payload.get(
83-
DataRow.row_data, None)
84-
return (not self.payload or len(self.payload.keys()) == 1 and
85-
"dataset_id" in self.payload or row_data is None or
86-
len(row_data) == 0)
87-
88-
@classmethod
89-
def build(
90-
cls,
91-
dataset_id: str,
92-
items: List[dict],
93-
key_types: Optional[Tuple[type, ...]] = ()
94-
) -> List["DataRowItemBase"]:
95-
return super().build(dataset_id, items, ())

libs/labelbox/src/labelbox/schema/internal/descriptor_file_creator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from labelbox.schema.embedding import EmbeddingVector
1414
from labelbox.schema.internal.datarow_upload_constants import (
1515
FILE_UPLOAD_THREAD_COUNT)
16-
from labelbox.schema.internal.data_row_upsert_item import DataRowItemBase, DataRowUpsertItem
16+
from labelbox.schema.internal.data_row_upsert_item import DataRowUpsertItem
1717

1818
from typing import TYPE_CHECKING
1919
if TYPE_CHECKING:
@@ -248,7 +248,7 @@ def format_legacy_conversational_data(item):
248248
return item
249249

250250
def convert_item(data_row_item):
251-
if isinstance(data_row_item, DataRowItemBase):
251+
if isinstance(data_row_item, DataRowUpsertItem):
252252
item = data_row_item.payload
253253
else:
254254
item = data_row_item
@@ -273,7 +273,7 @@ def convert_item(data_row_item):
273273
# Upload any local file paths
274274
item = upload_if_necessary(item)
275275

276-
if isinstance(data_row_item, DataRowItemBase):
276+
if isinstance(data_row_item, DataRowUpsertItem):
277277
return {'id': data_row_item.id, 'payload': item}
278278
else:
279279
return item
Lines changed: 4 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
1-
from unittest.mock import MagicMock, patch
2-
31
import pytest
4-
from labelbox.schema.internal.data_row_upsert_item import (DataRowUpsertItem,
5-
DataRowCreateItem)
2+
from labelbox.schema.internal.data_row_upsert_item import (DataRowUpsertItem)
63
from labelbox.schema.identifiable import UniqueId, GlobalKey
74
from labelbox.schema.asset_attachment import AttachmentType
8-
from labelbox.schema.dataset import Dataset
9-
from labelbox.schema.internal.descriptor_file_creator import DescriptorFileCreator
10-
from labelbox.schema.data_row import DataRow
115

126

137
@pytest.fixture
@@ -32,17 +26,6 @@ def data_row_create_items():
3226
return dataset_id, items
3327

3428

35-
@pytest.fixture
36-
def data_row_create_items_row_data_none():
37-
dataset_id = 'test_dataset'
38-
items = [
39-
{
40-
"row_data": None,
41-
},
42-
]
43-
return dataset_id, items
44-
45-
4629
@pytest.fixture
4730
def data_row_update_items():
4831
dataset_id = 'test_dataset'
@@ -63,15 +46,15 @@ def test_data_row_upsert_items(data_row_create_items, data_row_update_items):
6346
dataset_id, create_items = data_row_create_items
6447
dataset_id, update_items = data_row_update_items
6548
items = create_items + update_items
66-
result = DataRowUpsertItem.build(dataset_id, items)
49+
result = DataRowUpsertItem.build(dataset_id, items, (UniqueId, GlobalKey))
6750
assert len(result) == len(items)
6851
for item, res in zip(items, result):
6952
assert res.payload == item
7053

7154

7255
def test_data_row_create_items(data_row_create_items):
7356
dataset_id, items = data_row_create_items
74-
result = DataRowCreateItem.build(dataset_id, items)
57+
result = DataRowUpsertItem.build(dataset_id, items)
7558
assert len(result) == len(items)
7659
for item, res in zip(items, result):
7760
assert res.payload == item
@@ -80,75 +63,4 @@ def test_data_row_create_items(data_row_create_items):
8063
def test_data_row_create_items_not_updateable(data_row_update_items):
8164
dataset_id, items = data_row_update_items
8265
with pytest.raises(ValueError):
83-
DataRowCreateItem.build(dataset_id, items)
84-
85-
86-
def test_upsert_is_empty():
87-
item = DataRowUpsertItem(id={
88-
"id": UniqueId,
89-
"value": UniqueId("123")
90-
},
91-
payload={})
92-
assert item.is_empty()
93-
94-
item = DataRowUpsertItem(id={
95-
"id": UniqueId,
96-
"value": UniqueId("123")
97-
},
98-
payload={"dataset_id": "test_dataset"})
99-
assert item.is_empty()
100-
101-
item = DataRowUpsertItem(
102-
id={}, payload={"row_data": "http://my_site.com/photos/img_01.jpg"})
103-
assert not item.is_empty()
104-
105-
106-
def test_create_is_empty():
107-
item = DataRowCreateItem(id={}, payload={})
108-
assert item.is_empty()
109-
110-
item = DataRowCreateItem(id={}, payload={"dataset_id": "test_dataset"})
111-
assert item.is_empty()
112-
113-
item = DataRowCreateItem(id={}, payload={"row_data": None})
114-
assert item.is_empty()
115-
116-
item = DataRowCreateItem(id={}, payload={"row_data": ""})
117-
assert item.is_empty()
118-
119-
item = DataRowCreateItem(
120-
id={}, payload={"row_data": "http://my_site.com/photos/img_01.jpg"})
121-
assert not item.is_empty()
122-
123-
item = DataRowCreateItem(
124-
id={},
125-
payload={DataRow.row_data: "http://my_site.com/photos/img_01.jpg"})
126-
assert not item.is_empty()
127-
128-
129-
def test_create_row_data_none():
130-
items = [
131-
{
132-
"row_data": None,
133-
"global_key": "global_key1",
134-
},
135-
]
136-
client = MagicMock()
137-
dataset = Dataset(
138-
client, {
139-
"id": 'test_dataset',
140-
"name": 'test_dataset',
141-
"createdAt": "2021-06-01T00:00:00.000Z",
142-
"description": "test_dataset",
143-
"updatedAt": "2021-06-01T00:00:00.000Z",
144-
"rowCount": 0,
145-
})
146-
147-
with patch.object(DescriptorFileCreator,
148-
'create',
149-
return_value=["http://bar.com/chunk_uri"]):
150-
with pytest.raises(ValueError,
151-
match="Some items have an empty payload"):
152-
dataset.create_data_rows(items)
153-
154-
client.execute.assert_not_called()
66+
DataRowUpsertItem.build(dataset_id, items, ())

0 commit comments

Comments
 (0)