Skip to content

Commit c9d8e4b

Browse files
author
Matt Sokoloff
committed
add missing file
1 parent ab49cc8 commit c9d8e4b

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed

labelbox/data/metrics/group.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
"""
2+
Tools for grouping features and labels so that we can compute metrics on the individual groups
3+
"""
4+
from collections import defaultdict
5+
from typing import Dict, List, Tuple, Union
6+
try:
7+
from typing import Literal
8+
except ImportError:
9+
from typing_extensions import Literal
10+
11+
from ..annotation_types.feature import FeatureSchema
12+
from ..annotation_types import ObjectAnnotation, Label, LabelList
13+
14+
15+
def get_identifying_key(
16+
features_a: List[FeatureSchema], features_b: List[FeatureSchema]
17+
) -> Union[Literal['name'], Literal['feature_schema_id']]:
18+
"""
19+
Checks to make sure that features in both sets contain the same type of identifying keys.
20+
This can either be the feature name or feature schema id.
21+
22+
Args:
23+
features_a : List of FeatureSchemas (usually ObjectAnnotations or ClassificationAnnotations)
24+
features_b : List of FeatureSchemas (usually ObjectAnnotations or ClassificationAnnotations)
25+
Returns:
26+
The field name that is present in both feature lists.
27+
"""
28+
29+
all_schema_ids_defined_pred, all_names_defined_pred = all_have_key(
30+
features_a)
31+
if (not all_schema_ids_defined_pred and not all_names_defined_pred):
32+
raise ValueError("All data must have feature_schema_ids or names set")
33+
34+
all_schema_ids_defined_gt, all_names_defined_gt = all_have_key(features_b)
35+
36+
# Prefer name becuse the user will be able to know what it means
37+
# Schema id incase that doesn't exist.
38+
if (all_names_defined_pred and all_names_defined_gt):
39+
return 'name'
40+
elif all_schema_ids_defined_pred and all_schema_ids_defined_gt:
41+
return 'feature_schema_id'
42+
else:
43+
raise ValueError(
44+
"Ground truth and prediction annotations must have set all name or feature ids. "
45+
"Otherwise there is no key to match on. Please update.")
46+
47+
48+
def all_have_key(features: List[FeatureSchema]) -> Tuple[bool, bool]:
49+
"""
50+
Checks to make sure that all FeatureSchemas have names set or feature_schema_ids set.
51+
52+
Args:
53+
features (List[FeatureSchema]) :
54+
55+
"""
56+
all_names = True
57+
all_schemas = True
58+
for feature in features:
59+
if feature.name is None:
60+
all_names = False
61+
if feature.feature_schema_id is None:
62+
all_schemas = False
63+
return all_schemas, all_names
64+
65+
66+
def get_label_pairs(labels_a: LabelList,
67+
labels_b: LabelList,
68+
match_on="uid",
69+
filter_mismatch=False) -> Dict[str, Tuple[Label, Label]]:
70+
"""
71+
This is a function to pairing a list of prediction labels and a list of ground truth labels easier.
72+
There are a few potentiall problems with this function.
73+
We are assuming that the data row `uid` or `external id` have been provided by the user.
74+
However, these particular fields are not required and can be empty.
75+
If this assumption fails, then the user has to determine their own matching strategy.
76+
77+
Args:
78+
labels_a (LabelList): A collection of labels to match with labels_b
79+
labels_b (LabelList): A collection of labels to match with labels_a
80+
match_on ('uid' or 'external_id'): The data row key to match labels by. Can either be uid or external id.
81+
filter_mismatch (bool): Whether or not to ignore mismatches
82+
83+
Returns:
84+
A dict containing the union of all either uids or external ids and values as a tuple of the matched labels
85+
86+
"""
87+
88+
if match_on not in ['uid', 'external_id']:
89+
raise ValueError("Can only match on `uid` or `exteranl_id`.")
90+
91+
label_lookup_a = {
92+
getattr(label.data, match_on, None): label for label in labels_a
93+
}
94+
label_lookup_b = {
95+
getattr(label.data, match_on, None): label for label in labels_b
96+
}
97+
all_keys = set(label_lookup_a.keys()).union(label_lookup_b.keys())
98+
if None in label_lookup_a or None in label_lookup_b:
99+
raise ValueError(
100+
f"One or more of the labels has a data row without the required key {match_on}."
101+
" It cannot be determined which labels match without this information."
102+
f" Either assign {match_on} to each Label or create your own pairing function."
103+
)
104+
pairs = defaultdict(list)
105+
for key in all_keys:
106+
a, b = label_lookup_a.pop(key, None), label_lookup_b.pop(key, None)
107+
if a is None or b is None:
108+
if not filter_mismatch:
109+
raise ValueError(
110+
f"{match_on} {key} is not available in both LabelLists. "
111+
"Set `filter_mismatch = True` to filter out these examples, assign the ids manually, or create your own matching function."
112+
)
113+
else:
114+
continue
115+
pairs[key].extend([a, b])
116+
return pairs
117+
118+
119+
def get_feature_pairs(
120+
features_a: List[FeatureSchema], features_b: List[FeatureSchema]
121+
) -> Dict[str, Tuple[List[FeatureSchema], List[FeatureSchema]]]:
122+
"""
123+
Matches features by schema_ids
124+
125+
Args:
126+
labels_a (List[FeatureSchema]): A list of features to match with features_b
127+
labels_b (List[FeatureSchema]): A list of features to match with features_a
128+
Returns:
129+
The matched features as dict. The key will be the feature name and the value will be
130+
two lists each containing the matched features from each set.
131+
132+
"""
133+
identifying_key = get_identifying_key(features_a, features_b)
134+
lookup_a, lookup_b = _create_feature_lookup(
135+
features_a,
136+
identifying_key), _create_feature_lookup(features_b, identifying_key)
137+
138+
keys = set(lookup_a.keys()).union(set(lookup_b.keys()))
139+
result = defaultdict(list)
140+
for key in keys:
141+
result[key].extend([lookup_a[key], lookup_b[key]])
142+
return result
143+
144+
145+
def _create_feature_lookup(features: List[FeatureSchema],
146+
key: str) -> Dict[str, List[FeatureSchema]]:
147+
"""
148+
Groups annotation by name (if available otherwise feature schema id).
149+
150+
Args:
151+
annotations: List of annotations to group
152+
Returns:
153+
a dict where each key is the feature_schema_id (or name)
154+
and the value is a list of annotations that have that feature_schema_id (or name)
155+
"""
156+
grouped_features = defaultdict(list)
157+
for feature in features:
158+
grouped_features[getattr(feature, key)].append(feature)
159+
return grouped_features
160+
161+
162+
def has_no_matching_annotations(ground_truths: List[ObjectAnnotation],
163+
predictions: List[ObjectAnnotation]):
164+
if len(ground_truths) and not len(predictions):
165+
# No existing predictions but existing ground truths means no matches.
166+
return True
167+
elif not len(ground_truths) and len(predictions):
168+
# No ground truth annotations but there are predictions means no matches
169+
return True
170+
return False
171+
172+
173+
def has_no_annotations(ground_truths: List[ObjectAnnotation],
174+
predictions: List[ObjectAnnotation]):
175+
return not len(ground_truths) and not len(predictions)

0 commit comments

Comments
 (0)