Skip to content

Commit 54b0d18

Browse files
author
Matt Sokoloff
committed
bug fixes and test updates
1 parent 7a639f7 commit 54b0d18

37 files changed

+644
-324
lines changed

labelbox/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
name = "labelbox"
22
__version__ = "2.7.0"
33

4+
from labelbox.schema.project import Project
45
from labelbox.client import Client
56
from labelbox.schema.bulk_import_request import BulkImportRequest
6-
from labelbox.schema.project import Project
77
from labelbox.schema.dataset import Dataset
88
from labelbox.schema.data_row import DataRow
99
from labelbox.schema.label import Label
@@ -16,6 +16,6 @@
1616
from labelbox.schema.asset_attachment import AssetAttachment
1717
from labelbox.schema.webhook import Webhook
1818
from labelbox.schema.prediction import Prediction, PredictionModel
19-
from labelbox.schema.ontology import Ontology
19+
from labelbox.schema.ontology import Ontology, OntologyBuilder, Classification, Option, Tool
2020
from labelbox.schema.role import Role, ProjectRole
2121
from labelbox.schema.invite import Invite, InviteLimit
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from .geometry import Line
2+
from .geometry import Point
3+
from .geometry import Mask
4+
from .geometry import Polygon
5+
from .geometry import Rectangle
6+
from .geometry import Geometry
7+
8+
from .annotation import ClassificationAnnotation
9+
from .annotation import VideoClassificationAnnotation
10+
from .annotation import ObjectAnnotation
11+
from .annotation import VideoObjectAnnotation
12+
13+
from .ner import TextEntity
14+
15+
from .classification import Checklist
16+
from .classification import ClassificationAnswer
17+
from .classification import Dropdown
18+
from .classification import Radio
19+
from .classification import Text
20+
21+
from .data import RasterData
22+
from .data import TextData
23+
from .data import VideoData
24+
25+
from .label import Label
26+
27+
from .collection import LabelList
28+
from .collection import LabelGenerator

labelbox/data/annotation_types/annotation.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import Any, Dict, List, Union
22

3+
from pydantic.main import BaseModel
4+
35
from .classification import Checklist, Dropdown, Radio, Text
46
from .feature import FeatureSchema
57
from .geometry import Geometry
@@ -9,22 +11,19 @@
911
class BaseAnnotation(FeatureSchema):
1012
""" Base annotation class. Shouldn't be directly instantiated
1113
"""
12-
classifications: List["ClassificationAnnotation"] = []
1314
extra: Dict[str, Any] = {}
1415

1516

16-
class ObjectAnnotation(BaseAnnotation):
17-
"""Class representing objects annotations (non classifications or annotations that have a location)
18-
"""
19-
value: Union[TextEntity, Geometry]
20-
21-
2217
class ClassificationAnnotation(BaseAnnotation):
23-
"""Class represneting classification annotations (annotations that don't have a location) """
18+
"""Class representing classification annotations (annotations that don't have a location) """
2419
value: Union[Text, Checklist, Radio, Dropdown]
2520

2621

27-
ClassificationAnnotation.update_forward_refs()
22+
class ObjectAnnotation(BaseAnnotation):
23+
"""Class representing objects annotations (non classifications or annotations that have a location)
24+
"""
25+
value: Union[TextEntity, Geometry]
26+
classifications: List[ClassificationAnnotation] = []
2827

2928

3029
class VideoObjectAnnotation(ObjectAnnotation):
@@ -47,7 +46,3 @@ class VideoClassificationAnnotation(ClassificationAnnotation):
4746
frame: The frame index that this annotation corresponds to
4847
"""
4948
frame: int
50-
51-
52-
VideoObjectAnnotation.update_forward_refs()
53-
ObjectAnnotation.update_forward_refs()

labelbox/data/annotation_types/collection.py

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@
55

66
from tqdm import tqdm
77

8-
from labelbox.schema.ontology import OntologyBuilder
8+
from labelbox.schema import ontology
99
from labelbox.orm.model import Entity
10+
from ..ontology import get_classifications, get_tools
1011
from ..generator import PrefetchGenerator
1112
from .label import Label
1213

1314
logger = logging.getLogger(__name__)
1415

1516

16-
class LabelCollection:
17+
class LabelList:
1718
"""
1819
A container for interacting with a collection of labels.
1920
Less memory efficient than LabelGenerator but more performant and convenient to use.
@@ -25,15 +26,15 @@ def __init__(self, data: Iterable[Label]):
2526
self._index = 0
2627

2728
def assign_schema_ids(
28-
self, ontology_builder: OntologyBuilder) -> "LabelCollection":
29+
self, ontology_builder: "ontology.OntologyBuilder") -> "LabelList":
2930
"""
3031
Adds schema ids to all FeatureSchema objects in the Labels.
3132
This is necessary for MAL.
3233
3334
Args:
34-
ontology_builder: The ontology that matches the feature names assigned to objects in this LabelCollection
35+
ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
3536
Returns:
36-
LabelCollection. useful for chaining these modifying functions
37+
LabelList. useful for chaining these modifying functions
3738
"""
3839
for label in self._data:
3940
label.assign_schema_ids(ontology_builder)
@@ -42,7 +43,7 @@ def assign_schema_ids(
4243
def add_to_dataset(self,
4344
dataset: "Entity.Dataset",
4445
signer: Callable[[bytes], str],
45-
max_concurrency=20) -> "LabelCollection":
46+
max_concurrency=20) -> "LabelList":
4647
"""
4748
Creates data rows from each labels data object and attaches the data to the given dataset.
4849
Updates the label's data object to have the same external_id and uid as the data row.
@@ -55,15 +56,15 @@ def add_to_dataset(self,
5556
dataset: labelbox dataset object to add the new data row to
5657
signer: A function that accepts bytes and returns a signed url.
5758
Returns:
58-
LabelCollection with updated references to new data rows
59+
LabelList with updated references to new data rows
5960
"""
6061
self._ensure_unique_external_ids()
6162
self.add_url_to_data(signer, max_concurrency=max_concurrency)
6263
upload_task = dataset.create_data_rows([{
63-
Entity.DataRow.row_data: label.data.url,
64-
Entity.DataRow.external_id: label.data.external_id
64+
'row_data': label.data.url,
65+
'external_id': label.data.external_id
6566
} for label in self._data])
66-
upload_task.wait_til_done()
67+
upload_task.wait_till_done()
6768

6869
data_row_lookup = {
6970
data_row.external_id: data_row.uid
@@ -73,9 +74,9 @@ def add_to_dataset(self,
7374
label.data.uid = data_row_lookup[label.data.external_id]
7475
return self
7576

76-
def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
77+
def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelList":
7778
"""
78-
Creates signed urls for all masks in the LabelCollection.
79+
Creates signed urls for all masks in the LabelList.
7980
Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
8081
Only uploads url if one doesn't already exist.
8182
@@ -84,15 +85,15 @@ def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
8485
max_concurrency: how many threads to use for uploading.
8586
Should be balanced to match the signing services capabilities.
8687
Returns:
87-
LabelCollection with updated references to the new mask urls
88+
LabelList with updated references to the new mask urls
8889
"""
8990
for row in self._apply_threaded(
9091
[label.add_url_to_masks for label in self._data], max_concurrency,
9192
signer):
9293
...
9394
return self
9495

95-
def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
96+
def add_url_to_data(self, signer, max_concurrency=20) -> "LabelList":
9697
"""
9798
Creates signed urls for the data
9899
Only uploads url if one doesn't already exist.
@@ -102,32 +103,46 @@ def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
102103
max_concurrency: how many threads to use for uploading.
103104
Should be balanced to match the signing services capabilities.
104105
Returns:
105-
LabelCollection with updated references to the new data urls
106+
LabelList with updated references to the new data urls
106107
"""
107108
for row in self._apply_threaded(
108109
[label.add_url_to_data for label in self._data], max_concurrency,
109110
signer):
110111
...
111112
return self
112113

114+
def get_ontology(self) -> ontology.OntologyBuilder:
115+
classifications = []
116+
tools = []
117+
for label in self._data:
118+
tools = get_tools(label.object_annotations(), tools)
119+
classifications = get_classifications(
120+
label.classification_annotations(), classifications)
121+
return ontology.OntologyBuilder(tools=tools,
122+
classifications=classifications)
123+
113124
def _ensure_unique_external_ids(self) -> None:
114125
external_ids = set()
115126
for label in self._data:
116127
if label.data.external_id is None:
117-
label.data.external_id = uuid4()
128+
label.data.external_id = str(uuid4())
118129
else:
119130
if label.data.external_id in external_ids:
120131
raise ValueError(
121132
f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
122133
)
123134
external_ids.add(label.data.external_id)
124135

125-
def __iter__(self) -> "LabelCollection":
136+
def append(self, label: Label):
137+
self._data.append(label)
138+
139+
def __iter__(self) -> "LabelList":
126140
self._index = 0
127141
return self
128142

129143
def __next__(self) -> Label:
130144
if self._index == len(self._data):
145+
self._index = 0
131146
raise StopIteration
132147

133148
value = self._data[self._index]
@@ -154,18 +169,19 @@ class LabelGenerator(PrefetchGenerator):
154169
A container for interacting with a collection of labels.
155170
156171
Use this class if you have larger data. It is slightly harder to work with
157-
than the LabelCollection but will be much more memory efficient.
172+
than the LabelList but will be much more memory efficient.
158173
"""
159174

160175
def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
161176
self._fns = {}
162177
super().__init__(data, *args, **kwargs)
163178

164-
def as_collection(self) -> "LabelCollection":
165-
return LabelCollection(data=list(self))
179+
def as_list(self) -> "LabelList":
180+
return LabelList(data=list(self))
166181

167182
def assign_schema_ids(
168-
self, ontology_builder: OntologyBuilder) -> "LabelGenerator":
183+
self,
184+
ontology_builder: "ontology.OntologyBuilder") -> "LabelGenerator":
169185

170186
def _assign_ids(label: Label):
171187
label.assign_schema_ids(ontology_builder)
@@ -190,7 +206,7 @@ def _add_url_to_data(label: Label):
190206
label.add_url_to_data(signer)
191207
return label
192208

193-
self._fns['_add_url_to_data'] = _add_url_to_data
209+
self._fns['add_url_to_data'] = _add_url_to_data
194210
return self
195211

196212
def add_to_dataset(self, dataset: "Entity.Dataset",
@@ -199,7 +215,7 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
199215
Creates data rows from each labels data object and attaches the data to the given dataset.
200216
Updates the label's data object to have the same external_id and uid as the data row.
201217
202-
This is a lot slower than LabelCollection.add_to_dataset but also more memory efficient.
218+
This is a lot slower than LabelList.add_to_dataset but also more memory efficient.
203219
204220
Args:
205221
dataset: labelbox dataset object to add the new data row to
@@ -237,6 +253,20 @@ def _add_url_to_masks(label: Label):
237253
self._fns['add_url_to_masks'] = _add_url_to_masks
238254
return self
239255

256+
def register_background_fn(self, fn: Callable[[Label], Label],
257+
name: str) -> "LabelGenerator":
258+
"""
259+
Allows users to add arbitrary io functions to the generator.
260+
These functions will be exectuted in parallel and added to a prefetch queue.
261+
262+
Args:
263+
fn: Callable that modifies a label and then returns the same label
264+
- For performance reasons, this function shouldn't run if the object already has the desired state.
265+
name: Register the name of the function. If the name already exists, then the function will be replaced.
266+
"""
267+
self._fns[name] = fn
268+
return self
269+
240270
def __iter__(self):
241271
return self
242272

@@ -255,4 +285,4 @@ def __next__(self):
255285
return self._process(value)
256286

257287

258-
LabelData = Union[LabelCollection, LabelGenerator]
288+
LabelCollection = Union[LabelList, LabelGenerator]

0 commit comments

Comments
 (0)