Skip to content

Commit 326bceb

Browse files
author
Val Brodsky
committed
PR feedback: refactor DataRowUpsertItem
1 parent 79d3d0c commit 326bceb

File tree

4 files changed

+40
-43
lines changed

4 files changed

+40
-43
lines changed

libs/labelbox/src/labelbox/schema/dataset.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@
3232
from labelbox.schema.task import Task, DataUpsertTask
3333
from labelbox.schema.user import User
3434
from labelbox.schema.iam_integration import IAMIntegration
35-
from labelbox.schema.internal.data_row_create_upsert import (DataRowItemBase,
36-
DataRowUpsertItem,
37-
DataRowCreateItem)
35+
from labelbox.schema.internal.data_row_upsert_item import (DataRowUpsertItem)
3836
from labelbox.schema.internal.data_row_uploader import DataRowUploader
3937
from labelbox.schema.internal.datarow_upload_constants import (
4038
MAX_DATAROW_PER_API_OPERATION, FILE_UPLOAD_THREAD_COUNT, UPSERT_CHUNK_SIZE)
@@ -290,7 +288,7 @@ def create_data_rows(self,
290288

291289
if len(string_items) > 0:
292290
dict_string_items = self._build_from_local_paths(string_items)
293-
specs = DataRowCreateItem.build(self.uid,
291+
specs = DataRowUpsertItem.build(self.uid,
294292
dict_items + dict_string_items)
295293
return self._exec_upsert_data_rows(specs, file_upload_thread_count)
296294

@@ -607,12 +605,12 @@ def upsert_data_rows(self,
607605
>>> ])
608606
>>> task.wait_till_done()
609607
"""
610-
specs = DataRowUpsertItem.build(self.uid, items)
608+
specs = DataRowUpsertItem.build(self.uid, items, (UniqueId, GlobalKey))
611609
return self._exec_upsert_data_rows(specs, file_upload_thread_count)
612610

613611
def _exec_upsert_data_rows(
614612
self,
615-
specs: List[DataRowItemBase],
613+
specs: List[DataRowUpsertItem],
616614
file_upload_thread_count: int = FILE_UPLOAD_THREAD_COUNT
617615
) -> "DataUpsertTask":
618616

libs/labelbox/src/labelbox/schema/internal/data_row_uploader.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from labelbox.orm.model import Entity
1111
from labelbox.orm.model import Field
1212
from labelbox.schema.embedding import EmbeddingVector
13-
from labelbox.schema.internal.data_row_create_upsert import DataRowItemBase
1413
from labelbox.schema.internal.datarow_upload_constants import MAX_DATAROW_PER_API_OPERATION
14+
from labelbox.schema.internal.data_row_upsert_item import DataRowUpsertItem
1515

1616

1717
class UploadManifest:
@@ -213,7 +213,7 @@ def formatLegacyConversationalData(item):
213213
return item
214214

215215
def convert_item(data_row_item):
216-
if isinstance(data_row_item, DataRowItemBase):
216+
if isinstance(data_row_item, DataRowUpsertItem):
217217
item = data_row_item.payload
218218
else:
219219
item = data_row_item
@@ -238,7 +238,7 @@ def convert_item(data_row_item):
238238
# Upload any local file paths
239239
item = upload_if_necessary(item)
240240

241-
if isinstance(data_row_item, DataRowItemBase):
241+
if isinstance(data_row_item, DataRowUpsertItem):
242242
return {'id': data_row_item.id, 'payload': item}
243243
else:
244244
return item
@@ -263,7 +263,7 @@ def convert_item(data_row_item):
263263
filename="json_import.json")
264264

265265
@staticmethod
266-
def upload_in_chunks(client, specs: List[DataRowItemBase],
266+
def upload_in_chunks(client, specs: List[DataRowUpsertItem],
267267
file_upload_thread_count: int,
268268
upsert_chunk_size: int) -> UploadManifest:
269269
empty_specs = list(filter(lambda spec: spec.is_empty(), specs))

libs/labelbox/src/labelbox/schema/internal/data_row_create_upsert.py renamed to libs/labelbox/src/labelbox/schema/internal/data_row_upsert_item.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
1-
from abc import ABC, abstractmethod
21
from typing import List, Tuple, Optional
32

43
from labelbox.schema.identifiable import UniqueId, GlobalKey
54
from labelbox.pydantic_compat import BaseModel
65

76

8-
class DataRowItemBase(BaseModel, ABC):
7+
class DataRowUpsertItem(BaseModel):
98
"""
109
Base class for creating payloads for upsert operations.
1110
"""
1211
id: dict
1312
payload: dict
1413

1514
@classmethod
16-
@abstractmethod
1715
def build(
1816
cls,
1917
dataset_id: str,
@@ -52,25 +50,24 @@ def is_empty(self) -> bool:
5250
len(self.payload.keys()) == 1 and "dataset_id" in self.payload)
5351

5452

55-
class DataRowUpsertItem(DataRowItemBase):
56-
57-
@classmethod
58-
def build(
59-
cls,
60-
dataset_id: str,
61-
items: List[dict],
62-
key_types: Optional[Tuple[type, ...]] = ()
63-
) -> List["DataRowItemBase"]:
64-
return super().build(dataset_id, items, (UniqueId, GlobalKey))
53+
# class DataRowUpsertItem(DataRowItemBase):
6554

55+
# @classmethod
56+
# def build(
57+
# cls,
58+
# dataset_id: str,
59+
# items: List[dict],
60+
# key_types: Optional[Tuple[type, ...]] = ()
61+
# ) -> List["DataRowItemBase"]:
62+
# return super().build(dataset_id, items, (UniqueId, GlobalKey))
6663

67-
class DataRowCreateItem(DataRowItemBase):
64+
# class DataRowCreateItem(DataRowItemBase):
6865

69-
@classmethod
70-
def build(
71-
cls,
72-
dataset_id: str,
73-
items: List[dict],
74-
key_types: Optional[Tuple[type, ...]] = ()
75-
) -> List["DataRowItemBase"]:
76-
return super().build(dataset_id, items, ())
66+
# @classmethod
67+
# def build(
68+
# cls,
69+
# dataset_id: str,
70+
# items: List[dict],
71+
# key_types: Optional[Tuple[type, ...]] = ()
72+
# ) -> List["DataRowItemBase"]:
73+
# return super().build(dataset_id, items, ())

libs/labelbox/tests/unit/test_data_row_upsert_data.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import pytest
2-
from labelbox.schema.internal.data_row_create_upsert import (DataRowUpsertItem,
3-
DataRowCreateItem)
2+
from labelbox.schema.internal.data_row_upsert_item import (DataRowUpsertItem)
43
from labelbox.schema.identifiable import UniqueId, GlobalKey
54
from labelbox.schema.asset_attachment import AttachmentType
65

@@ -13,12 +12,15 @@ def data_row_create_items():
1312
"row_data": "http://my_site.com/photos/img_01.jpg",
1413
"global_key": "global_key1",
1514
"external_id": "ex_id1",
16-
"attachments": [
17-
{"type": AttachmentType.RAW_TEXT, "name": "att1", "value": "test1"}
18-
],
19-
"metadata": [
20-
{"name": "tag", "value": "tag value"},
21-
]
15+
"attachments": [{
16+
"type": AttachmentType.RAW_TEXT,
17+
"name": "att1",
18+
"value": "test1"
19+
}],
20+
"metadata": [{
21+
"name": "tag",
22+
"value": "tag value"
23+
},]
2224
},
2325
]
2426
return dataset_id, items
@@ -44,15 +46,15 @@ def test_data_row_upsert_items(data_row_create_items, data_row_update_items):
4446
dataset_id, create_items = data_row_create_items
4547
dataset_id, update_items = data_row_update_items
4648
items = create_items + update_items
47-
result = DataRowUpsertItem.build(dataset_id, items)
49+
result = DataRowUpsertItem.build(dataset_id, items, (UniqueId, GlobalKey))
4850
assert len(result) == len(items)
4951
for item, res in zip(items, result):
5052
assert res.payload == item
5153

5254

5355
def test_data_row_create_items(data_row_create_items):
5456
dataset_id, items = data_row_create_items
55-
result = DataRowCreateItem.build(dataset_id, items)
57+
result = DataRowUpsertItem.build(dataset_id, items)
5658
assert len(result) == len(items)
5759
for item, res in zip(items, result):
5860
assert res.payload == item
@@ -61,4 +63,4 @@ def test_data_row_create_items(data_row_create_items):
6163
def test_data_row_create_items_not_updateable(data_row_update_items):
6264
dataset_id, items = data_row_update_items
6365
with pytest.raises(ValueError):
64-
DataRowCreateItem.build(dataset_id, items)
66+
DataRowUpsertItem.build(dataset_id, items, ())

0 commit comments

Comments
 (0)