Skip to content

Commit b2952bd

Browse files
committed
Adding checks for missing data
1 parent b6247c9 commit b2952bd

File tree

8 files changed

+131
-89
lines changed

8 files changed

+131
-89
lines changed

ads/feature_store/statistics/charts/abstract_feature_stat.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (c) 2023 Oracle and/or its affiliates.
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55
from abc import abstractmethod
6+
from typing import Union
67

78
from ads.common.decorator.runtime_dependency import OptionalDependency
89

@@ -16,13 +17,24 @@
1617

1718

1819
class AbsFeatureStat:
20+
class ValidationFailedException(Exception):
21+
def __init__(self):
22+
pass
23+
24+
def __init__(self):
25+
self.__validate__()
26+
27+
@abstractmethod
28+
def __validate__(self):
29+
pass
30+
1931
@abstractmethod
2032
def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
2133
pass
2234

2335
@classmethod
2436
@abstractmethod
25-
def from_json(cls, json_dict: dict):
37+
def __from_json__(cls, json_dict: dict):
2638
pass
2739

2840
@staticmethod
@@ -33,3 +45,15 @@ def get_x_y_str_axes(xaxis: int, yaxis: int) -> ():
3345
("x" + str(xaxis + 1)),
3446
("y" + str(yaxis + 1)),
3547
)
48+
49+
@classmethod
50+
def from_json(
51+
cls, json_dict: dict, ignore_errors: bool = False
52+
) -> Union["AbsFeatureStat", None]:
53+
try:
54+
return cls.__from_json__(json_dict=json_dict)
55+
except Exception as e:
56+
if ignore_errors:
57+
return None
58+
else:
59+
raise e

ads/feature_store/statistics/charts/box_plot.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class BoxPlot(AbsFeatureStat):
2828
CONST_MEAN = "Mean"
2929
CONST_BOX_PLOT_TITLE = "Box Plot"
3030
CONST_IQR = "IQR"
31-
CONST_FREQUENCY_DISTRIBUTION = "FrequencyDistribution"
31+
CONST_BOX_POINTS = "box_points"
3232

3333
class Quartiles:
3434
CONST_Q1 = "q1"
@@ -42,14 +42,11 @@ def __init__(self, q1: float, q2: float, q3: float):
4242

4343
@classmethod
4444
def from_json(cls, json_dict: dict) -> "BoxPlot.Quartiles":
45-
if json_dict is not None:
46-
return cls(
47-
json_dict.get(cls.CONST_Q1),
48-
json_dict.get(cls.CONST_Q2),
49-
json_dict.get(cls.CONST_Q3),
50-
)
51-
else:
52-
return None
45+
return cls(
46+
json_dict.get(cls.CONST_Q1),
47+
json_dict.get(cls.CONST_Q2),
48+
json_dict.get(cls.CONST_Q3),
49+
)
5350

5451
def __init__(
5552
self,
@@ -58,25 +55,28 @@ def __init__(
5855
sd: float,
5956
q1: float,
6057
q3: float,
61-
boxpoints: List[float],
58+
box_points: List[float],
6259
):
6360
self.mean = mean
6461
self.median = median
6562
self.q1 = q1
6663
self.q3 = q3
6764
self.sd = sd
6865
self.iqr = self.q3 - self.q1
69-
self.boxpoints = boxpoints
66+
self.box_points = box_points
67+
super().__init__()
7068

7169
def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
7270
xaxis_str, yaxis_str, x_str, y_str = self.get_x_y_str_axes(xaxis, yaxis)
7371
fig.add_box(
72+
notched=False,
73+
boxmean=False,
7474
mean=[self.mean],
7575
median=[self.median],
7676
q1=[self.q1],
7777
q3=[self.q3],
7878
sd=[self.sd],
79-
y=[self.boxpoints],
79+
y=[self.box_points],
8080
upperfence=[self.q3 + 1.5 * self.iqr],
8181
lowerfence=[self.q1 - 1.5 * self.iqr],
8282
xaxis=x_str,
@@ -88,33 +88,33 @@ def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
8888
fig.layout[yaxis_str]["title"] = "Values"
8989

9090
@staticmethod
91-
def get_boxpoints_from_frequency_distribution(
91+
def get_box_points_from_frequency_distribution(
9292
frequency_distribution: FrequencyDistribution,
9393
) -> List[float]:
94-
boxpoints = []
95-
if frequency_distribution is not None:
96-
for frequency, bin in zip(
97-
frequency_distribution.frequency, frequency_distribution.bins
98-
):
99-
boxpoints.extend([bin] * frequency)
100-
101-
return boxpoints
94+
# box_points = []
95+
if (
96+
frequency_distribution is not None
97+
and frequency_distribution.frequency is not None
98+
and frequency_distribution.bins is not None
99+
):
100+
return [
101+
bin_dist
102+
for frequency, bin_dist in zip(
103+
frequency_distribution.frequency, frequency_distribution.bins
104+
)
105+
if frequency > 0
106+
]
107+
else:
108+
return []
102109

103110
@classmethod
104-
def from_json(cls, json_dict: dict) -> "BoxPlot":
105-
if type(json_dict) is dict and json_dict.get(cls.CONST_QUARTILES) is not None:
106-
quartiles = cls.Quartiles.from_json(json_dict.get(cls.CONST_QUARTILES))
107-
return cls(
108-
mean=GenericFeatureValue.from_json(json_dict.get(cls.CONST_MEAN)).val,
109-
median=quartiles.q2,
110-
sd=GenericFeatureValue.from_json(json_dict.get(cls.CONST_SD)).val,
111-
q1=quartiles.q1,
112-
q3=quartiles.q3,
113-
boxpoints=cls.get_boxpoints_from_frequency_distribution(
114-
FrequencyDistribution.from_json(
115-
json_dict.get(cls.CONST_FREQUENCY_DISTRIBUTION)
116-
)
117-
),
118-
)
119-
else:
120-
return None
111+
def __from_json__(cls, json_dict: dict) -> "BoxPlot":
112+
quartiles = cls.Quartiles.from_json(json_dict.get(cls.CONST_QUARTILES))
113+
return cls(
114+
mean=GenericFeatureValue.from_json(json_dict.get(cls.CONST_MEAN)).val,
115+
median=quartiles.q2,
116+
sd=GenericFeatureValue.from_json(json_dict.get(cls.CONST_SD)).val,
117+
q1=quartiles.q1,
118+
q3=quartiles.q3,
119+
box_points=json_dict.get(cls.CONST_BOX_POINTS),
120+
)

ads/feature_store/statistics/charts/frequency_distribution.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,25 @@ class FrequencyDistribution(AbsFeatureStat):
2121
CONST_BINS = "bins"
2222
CONST_FREQUENCY_DISTRIBUTION_TITLE = "Frequency Distribution"
2323

24+
def __validate__(self):
25+
if not (
26+
type(self.frequency) == list
27+
and type(self.bins) == list
28+
and 0 < len(self.frequency) == len(self.bins) > 0
29+
):
30+
raise self.ValidationFailedException()
31+
2432
def __init__(self, frequency: List, bins: List):
2533
self.frequency = frequency
2634
self.bins = bins
35+
super().__init__()
2736

2837
@classmethod
29-
def from_json(cls, json_dict: dict) -> "FrequencyDistribution":
30-
if json_dict is not None:
31-
return FrequencyDistribution(
32-
frequency=json_dict.get(cls.CONST_FREQUENCY),
33-
bins=json_dict.get(cls.CONST_BINS),
34-
)
35-
else:
36-
return None
38+
def __from_json__(cls, json_dict: dict) -> "FrequencyDistribution":
39+
return FrequencyDistribution(
40+
frequency=json_dict.get(cls.CONST_FREQUENCY),
41+
bins=json_dict.get(cls.CONST_BINS),
42+
)
3743

3844
def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
3945
xaxis_str, yaxis_str, x_str, y_str = self.get_x_y_str_axes(xaxis, yaxis)

ads/feature_store/statistics/charts/probability_distribution.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,29 @@
1717

1818

1919
class ProbabilityDistribution(AbsFeatureStat):
20+
def __validate__(self):
21+
if not (
22+
type(self.density) == list
23+
and type(self.bins) == list
24+
and 0 < len(self.density) == len(self.bins) > 0
25+
):
26+
raise self.ValidationFailedException()
27+
2028
CONST_DENSITY = "density"
2129
CONST_BINS = "bins"
2230
CONST_PROBABILITY_DISTRIBUTION_TITLE = "Probability Distribution"
2331

2432
def __init__(self, density: List, bins: List):
2533
self.density = density
2634
self.bins = bins
35+
super().__init__()
2736

2837
@classmethod
29-
def from_json(cls, json_dict: dict):
30-
if json_dict is not None:
31-
return cls(
32-
density=json_dict.get(ProbabilityDistribution.CONST_DENSITY),
33-
bins=json_dict.get(ProbabilityDistribution.CONST_BINS),
34-
)
35-
else:
36-
return None
38+
def __from_json__(cls, json_dict: dict) -> "ProbabilityDistribution":
39+
return cls(
40+
density=json_dict.get(ProbabilityDistribution.CONST_DENSITY),
41+
bins=json_dict.get(ProbabilityDistribution.CONST_BINS),
42+
)
3743

3844
def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
3945
xaxis_str, yaxis_str, x_str, y_str = self.get_x_y_str_axes(xaxis, yaxis)

ads/feature_store/statistics/charts/top_k_frequent_elements.py

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# Copyright (c) 2023 Oracle and/or its affiliates.
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55
from typing import List
6-
76
from ads.common.decorator.runtime_dependency import OptionalDependency
87

98
from ads.feature_store.statistics.charts.abstract_feature_stat import AbsFeatureStat
@@ -18,6 +17,10 @@
1817

1918

2019
class TopKFrequentElements(AbsFeatureStat):
20+
def __validate__(self):
21+
if not (type(self.elements) == list and len(self.elements) > 0):
22+
raise self.ValidationFailedException
23+
2124
CONST_VALUE = "value"
2225
CONST_TOP_K_FREQUENT_TITLE = "Top K Frequent Elements"
2326

@@ -39,28 +42,21 @@ def __init__(
3942
def from_json(
4043
cls, json_dict: dict
4144
) -> "TopKFrequentElements.TopKFrequentElement":
42-
if json_dict is not None:
43-
return cls(
44-
value=json_dict.get(cls.CONST_VALUE),
45-
estimate=json_dict.get(cls.CONST_ESTIMATE),
46-
lower_bound=json_dict.get(cls.CONST_LOWER_BOUND),
47-
upper_bound=json_dict.get(cls.CONST_UPPER_BOUND),
48-
)
49-
else:
50-
return None
45+
return cls(
46+
value=json_dict.get(cls.CONST_VALUE),
47+
estimate=json_dict.get(cls.CONST_ESTIMATE),
48+
lower_bound=json_dict.get(cls.CONST_LOWER_BOUND),
49+
upper_bound=json_dict.get(cls.CONST_UPPER_BOUND),
50+
)
5151

5252
def __init__(self, elements: List[TopKFrequentElement]):
5353
self.elements = elements
54+
super().__init__()
5455

5556
@classmethod
56-
def from_json(cls, json_dict: dict) -> "TopKFrequentElements":
57-
if json_dict is not None and json_dict.get(cls.CONST_VALUE) is not None:
58-
elements = json_dict.get(cls.CONST_VALUE)
59-
return cls(
60-
[cls.TopKFrequentElement.from_json(element) for element in elements]
61-
)
62-
else:
63-
return None
57+
def __from_json__(cls, json_dict: dict) -> "TopKFrequentElements":
58+
elements = json_dict.get(cls.CONST_VALUE)
59+
return cls([cls.TopKFrequentElement.from_json(element) for element in elements])
6460

6561
def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
6662
xaxis_str, yaxis_str, x_str, y_str = self.get_x_y_str_axes(xaxis, yaxis)

ads/feature_store/statistics/feature_stat.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,27 @@ def __init__(
5353
@classmethod
5454
def from_json(cls, feature_name: str, json_dict: dict) -> "FeatureStatistics":
5555
if json_dict is not None:
56+
frequency_distribution = FrequencyDistribution.from_json(
57+
json_dict.get(cls.CONST_FREQUENCY_DISTRIBUTION), ignore_errors=True
58+
)
59+
60+
# inject box points for boxplot creation
61+
json_dict[
62+
BoxPlot.CONST_BOX_POINTS
63+
] = BoxPlot.get_box_points_from_frequency_distribution(
64+
frequency_distribution
65+
)
5666
return cls(
5767
feature_name,
58-
TopKFrequentElements.from_json(json_dict.get(cls.CONST_TOP_K_FREQUENT)),
59-
FrequencyDistribution.from_json(
60-
json_dict.get(cls.CONST_FREQUENCY_DISTRIBUTION)
68+
TopKFrequentElements.from_json(
69+
json_dict.get(cls.CONST_TOP_K_FREQUENT), ignore_errors=True
6170
),
71+
frequency_distribution,
6272
ProbabilityDistribution.from_json(
63-
json_dict.get(cls.CONST_PROBABILITY_DISTRIBUTION)
73+
json_dict.get(cls.CONST_PROBABILITY_DISTRIBUTION),
74+
ignore_errors=True,
6475
),
65-
BoxPlot.from_json(json_dict),
76+
BoxPlot.from_json(json_dict, ignore_errors=True),
6677
)
6778
else:
6879
return cls(feature_name)
@@ -72,10 +83,10 @@ def __feature_stat_objects__(self) -> List[AbsFeatureStat]:
7283
return [
7384
stat
7485
for stat in [
86+
self.box_plot,
7587
self.top_k_frequent_elements,
7688
self.frequency_distribution,
7789
self.probability_distribution,
78-
self.box_plot,
7990
]
8091
if stat is not None
8192
]
@@ -87,8 +98,7 @@ def next_graph_position_generator():
8798
yield 0
8899
yield 2
89100

90-
graph_count = len(self.__feature_stat_objects__)
91-
if graph_count > 0:
101+
if len(self.__feature_stat_objects__) > 0:
92102
fig = make_subplots(cols=3, column_titles=[" "] * 3)
93103
for idx, stat in zip(
94104
next_graph_position_generator(),
@@ -104,3 +114,7 @@ def next_graph_position_generator():
104114
fig,
105115
filename=self.CONST_PLOT_FORMAT.format(self.feature_name),
106116
)
117+
else:
118+
print(
119+
f"No statistical information for feature {self.feature_name} can be visualised"
120+
)

ads/feature_store/statistics/generic_feature_value.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ def __init__(self, val: any):
1212

1313
@classmethod
1414
def from_json(cls, json_dict: dict) -> "GenericFeatureValue":
15-
if json_dict is not None:
16-
return GenericFeatureValue(
17-
val=json_dict.get(cls.CONST_VALUE),
18-
)
19-
else:
20-
return None
15+
return GenericFeatureValue(
16+
val=json_dict.get(cls.CONST_VALUE),
17+
)

ads/feature_store/statistics/statistics.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@ def kind(self) -> str:
2828

2929
def to_viz(self, feature_list: List[str] = None):
3030
if self.content is not None:
31-
stats: dict = json.loads(self.content)
3231
[
3332
FeatureStatistics.from_json(feature, stat).to_viz()
34-
for feature, stat in stats.items()
33+
for feature, stat in json.loads(self.content).items()
3534
if (feature_list is None or feature in feature_list)
3635
]

0 commit comments

Comments
 (0)