Skip to content

Commit f95e611

Browse files
authored
[PLT-1993] Fixed relationship label bug (#1918)
1 parent fbd3b33 commit f95e611

File tree

5 files changed

+430
-368
lines changed

5 files changed

+430
-368
lines changed

libs/labelbox/src/labelbox/data/serialization/ndjson/converter.py

Lines changed: 1 addition & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,9 @@
1-
import copy
21
import logging
3-
import uuid
4-
from collections import defaultdict, deque
5-
from typing import Any, Deque, Dict, Generator, List, Set, Union
2+
from typing import Any, Dict, Generator
63

7-
from labelbox.data.annotation_types.annotation import ObjectAnnotation
8-
from labelbox.data.annotation_types.classification.classification import (
9-
ClassificationAnnotation,
10-
)
11-
from labelbox.data.annotation_types.metrics.confusion_matrix import (
12-
ConfusionMatrixMetric,
13-
)
14-
from labelbox.data.annotation_types.metrics.scalar import ScalarMetric
15-
from labelbox.data.annotation_types.video import VideoMaskAnnotation
164

175
from ...annotation_types.collection import LabelCollection
18-
from ...annotation_types.relationship import RelationshipAnnotation
19-
from ...annotation_types.mmc import MessageEvaluationTaskAnnotation
206
from .label import NDLabel
21-
import copy
227

238
logger = logging.getLogger(__name__)
249

@@ -42,67 +27,8 @@ def serialize(
4227
Returns:
4328
A generator for accessing the ndjson representation of the data
4429
"""
45-
used_uuids: Set[uuid.UUID] = set()
4630

47-
relationship_uuids: Dict[uuid.UUID, Deque[uuid.UUID]] = defaultdict(
48-
deque
49-
)
50-
51-
# UUIDs are private properties used to enhance UX when defining relationships.
52-
# They are created for all annotations, but only utilized for relationships.
53-
# To avoid overwriting, UUIDs must be unique across labels.
54-
# Non-relationship annotation UUIDs are regenerated when they are reused.
55-
# For relationship annotations, during first pass, we update the UUIDs of the source and target annotations.
56-
# During the second pass, we update the UUIDs of the annotations referenced by the relationship annotations.
5731
for label in labels:
58-
uuid_safe_annotations: List[
59-
Union[
60-
ClassificationAnnotation,
61-
ObjectAnnotation,
62-
VideoMaskAnnotation,
63-
ScalarMetric,
64-
ConfusionMatrixMetric,
65-
RelationshipAnnotation,
66-
MessageEvaluationTaskAnnotation,
67-
]
68-
] = []
69-
# First pass to get all RelationshipAnnotaitons
70-
# and update the UUIDs of the source and target annotations
71-
for annotation in label.annotations:
72-
if isinstance(annotation, RelationshipAnnotation):
73-
annotation = copy.deepcopy(annotation)
74-
new_source_uuid = uuid.uuid4()
75-
new_target_uuid = uuid.uuid4()
76-
relationship_uuids[annotation.value.source._uuid].append(
77-
new_source_uuid
78-
)
79-
relationship_uuids[annotation.value.target._uuid].append(
80-
new_target_uuid
81-
)
82-
annotation.value.source._uuid = new_source_uuid
83-
annotation.value.target._uuid = new_target_uuid
84-
if annotation._uuid in used_uuids:
85-
annotation._uuid = uuid.uuid4()
86-
used_uuids.add(annotation._uuid)
87-
uuid_safe_annotations.append(annotation)
88-
# Second pass to update UUIDs for annotations referenced by RelationshipAnnotations
89-
for annotation in label.annotations:
90-
if not isinstance(
91-
annotation, RelationshipAnnotation
92-
) and hasattr(annotation, "_uuid"):
93-
annotation = copy.deepcopy(annotation)
94-
next_uuids = relationship_uuids[annotation._uuid]
95-
if len(next_uuids) > 0:
96-
annotation._uuid = next_uuids.popleft()
97-
98-
if annotation._uuid in used_uuids:
99-
annotation._uuid = uuid.uuid4()
100-
used_uuids.add(annotation._uuid)
101-
uuid_safe_annotations.append(annotation)
102-
else:
103-
if not isinstance(annotation, RelationshipAnnotation):
104-
uuid_safe_annotations.append(annotation)
105-
label.annotations = uuid_safe_annotations
10632
for example in NDLabel.from_common([label]):
10733
annotation_uuid = getattr(example, "uuid", None)
10834
res = example.model_dump(

libs/labelbox/src/labelbox/data/serialization/ndjson/label.py

Lines changed: 32 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
from collections import defaultdict
2+
import copy
23
from itertools import groupby
34
from operator import itemgetter
4-
from typing import Dict, Generator, List, Tuple, Union
5+
from typing import Generator, List, Tuple, Union
6+
from uuid import uuid4
57

68
from pydantic import BaseModel
79

810
from ...annotation_types.annotation import (
911
ClassificationAnnotation,
1012
ObjectAnnotation,
1113
)
12-
from ...annotation_types.collection import LabelCollection, LabelGenerator
13-
from ...annotation_types.data.generic_data_row_data import GenericDataRowData
14+
from ...annotation_types.collection import LabelCollection
1415
from ...annotation_types.label import Label
1516
from ...annotation_types.llm_prompt_response.prompt import (
1617
PromptClassificationAnnotation,
@@ -23,7 +24,6 @@
2324
VideoMaskAnnotation,
2425
VideoObjectAnnotation,
2526
)
26-
from .base import DataRow
2727
from .classification import (
2828
NDChecklistSubclass,
2929
NDClassification,
@@ -60,143 +60,19 @@
6060
class NDLabel(BaseModel):
6161
annotations: AnnotationType
6262

63-
class _Relationship(BaseModel):
64-
"""This object holds information about the relationship"""
65-
66-
ndjson: NDRelationship
67-
source: str
68-
target: str
69-
70-
class _AnnotationGroup(BaseModel):
71-
"""Stores all the annotations and relationships per datarow"""
72-
73-
data_row: DataRow = None
74-
ndjson_annotations: Dict[str, AnnotationType] = {}
75-
relationships: List["NDLabel._Relationship"] = []
76-
77-
def to_common(self) -> LabelGenerator:
78-
annotation_groups = defaultdict(NDLabel._AnnotationGroup)
79-
80-
for ndjson_annotation in self.annotations:
81-
key = (
82-
ndjson_annotation.data_row.id
83-
or ndjson_annotation.data_row.global_key
84-
)
85-
group = annotation_groups[key]
86-
87-
if isinstance(ndjson_annotation, NDRelationship):
88-
group.relationships.append(
89-
NDLabel._Relationship(
90-
ndjson=ndjson_annotation,
91-
source=ndjson_annotation.relationship.source,
92-
target=ndjson_annotation.relationship.target,
93-
)
94-
)
95-
else:
96-
# if this is the first object in this group, we
97-
# take note of the DataRow this group belongs to
98-
# and store it in the _AnnotationGroupTuple
99-
if not group.ndjson_annotations:
100-
group.data_row = ndjson_annotation.data_row
101-
102-
# if this assertion fails and it's a valid case,
103-
# we need to change the value type of
104-
# `_AnnotationGroupTuple.ndjson_objects` to accept a list of objects
105-
# and adapt the code to support duplicate UUIDs
106-
assert (
107-
ndjson_annotation.uuid not in group.ndjson_annotations
108-
), f"UUID '{ndjson_annotation.uuid}' is not unique"
109-
110-
group.ndjson_annotations[ndjson_annotation.uuid] = (
111-
ndjson_annotation
112-
)
113-
114-
return LabelGenerator(
115-
data=self._generate_annotations(annotation_groups)
116-
)
117-
11863
@classmethod
11964
def from_common(
12065
cls, data: LabelCollection
12166
) -> Generator["NDLabel", None, None]:
12267
for label in data:
68+
if all(
69+
isinstance(model, RelationshipAnnotation)
70+
for model in label.annotations
71+
):
72+
yield from cls._create_relationship_annotations(label)
12373
yield from cls._create_non_video_annotations(label)
12474
yield from cls._create_video_annotations(label)
12575

126-
def _generate_annotations(
127-
self, annotation_groups: Dict[str, _AnnotationGroup]
128-
) -> Generator[Label, None, None]:
129-
for _, group in annotation_groups.items():
130-
relationship_annotations: Dict[str, ObjectAnnotation] = {}
131-
annotations = []
132-
# first, we iterate through all the NDJSON objects and store the
133-
# deserialized objects in the _AnnotationGroupTuple
134-
# object *if* the object can be used in a relationship
135-
for uuid, ndjson_annotation in group.ndjson_annotations.items():
136-
if isinstance(ndjson_annotation, NDSegments):
137-
annotations.extend(
138-
NDSegments.to_common(
139-
ndjson_annotation,
140-
ndjson_annotation.name,
141-
ndjson_annotation.schema_id,
142-
)
143-
)
144-
elif isinstance(ndjson_annotation, NDVideoMasks):
145-
annotations.append(
146-
NDVideoMasks.to_common(ndjson_annotation)
147-
)
148-
elif isinstance(ndjson_annotation, NDObjectType.__args__):
149-
annotation = NDObject.to_common(ndjson_annotation)
150-
annotations.append(annotation)
151-
relationship_annotations[uuid] = annotation
152-
elif isinstance(
153-
ndjson_annotation, NDClassificationType.__args__
154-
):
155-
annotations.extend(
156-
NDClassification.to_common(ndjson_annotation)
157-
)
158-
elif isinstance(
159-
ndjson_annotation, (NDScalarMetric, NDConfusionMatrixMetric)
160-
):
161-
annotations.append(
162-
NDMetricAnnotation.to_common(ndjson_annotation)
163-
)
164-
elif isinstance(ndjson_annotation, NDPromptClassificationType):
165-
annotation = NDPromptClassification.to_common(
166-
ndjson_annotation
167-
)
168-
annotations.append(annotation)
169-
elif isinstance(ndjson_annotation, NDMessageTask):
170-
annotations.append(ndjson_annotation.to_common())
171-
else:
172-
raise TypeError(
173-
f"Unsupported annotation. {type(ndjson_annotation)}"
174-
)
175-
176-
# after all the annotations have been discovered, we can now create
177-
# the relationship objects and use references to the objects
178-
# involved
179-
for relationship in group.relationships:
180-
try:
181-
source, target = (
182-
relationship_annotations[relationship.source],
183-
relationship_annotations[relationship.target],
184-
)
185-
except KeyError:
186-
raise ValueError(
187-
f"Relationship object refers to nonexistent object with UUID '{relationship.source}' and/or '{relationship.target}'"
188-
)
189-
annotations.append(
190-
NDRelationship.to_common(
191-
relationship.ndjson, source, target
192-
)
193-
)
194-
195-
yield Label(
196-
annotations=annotations,
197-
data=GenericDataRowData,
198-
)
199-
20076
@staticmethod
20177
def _get_consecutive_frames(
20278
frames_indices: List[int],
@@ -317,3 +193,26 @@ def _create_non_video_annotations(cls, label: Label):
317193
raise TypeError(
318194
f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value',annotation))}`"
319195
)
196+
197+
def _create_relationship_annotations(cls, label: Label):
198+
relationship_annotations = [
199+
annotation
200+
for annotation in label.annotations
201+
if isinstance(annotation, RelationshipAnnotation)
202+
]
203+
for relationship_annotation in relationship_annotations:
204+
uuid1 = uuid4()
205+
uuid2 = uuid4()
206+
source = copy.copy(relationship_annotation.value.source)
207+
target = copy.copy(relationship_annotation.value.target)
208+
if not isinstance(source, ObjectAnnotation) or not isinstance(
209+
target, ObjectAnnotation
210+
):
211+
raise TypeError(
212+
f"Unable to create relationship with non ObjectAnnotations. `Source: {type(source)} Target: {type(target)}`"
213+
)
214+
if not source._uuid:
215+
source._uuid = uuid1
216+
if not target._uuid:
217+
target._uuid = uuid2
218+
yield relationship_annotation

libs/labelbox/tests/data/annotation_import/test_mea_prediction_import.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,6 @@ def test_create_from_label_objects(
218218
annotations=[
219219
ObjectAnnotation(
220220
name="polygon",
221-
extra={
222-
"uuid": "6d10fa30-3ea0-4e6c-bbb1-63f5c29fe3e4",
223-
},
224221
value=Polygon(
225222
points=[
226223
Point(x=147.692, y=118.154),
@@ -233,19 +230,13 @@ def test_create_from_label_objects(
233230
),
234231
ObjectAnnotation(
235232
name="bbox",
236-
extra={
237-
"uuid": "15b7138f-4bbc-42c5-ae79-45d87b0a3b2a",
238-
},
239233
value=Rectangle(
240234
start=Point(x=58.0, y=48.0),
241235
end=Point(x=70.0, y=113.0),
242236
),
243237
),
244238
ObjectAnnotation(
245239
name="polyline",
246-
extra={
247-
"uuid": "cf4c6df9-c39c-4fbc-9541-470f6622978a",
248-
},
249240
value=Line(
250241
points=[
251242
Point(x=147.692, y=118.154),

0 commit comments

Comments
 (0)