Skip to content

Commit b6247c9

Browse files
committed
adding boxplot, fixing alignment issues and supporting per feature statistic visualisation
1 parent 3663473 commit b6247c9

File tree

5 files changed

+50
-20
lines changed

5 files changed

+50
-20
lines changed

ads/feature_store/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from ads.feature_store.feature_group_expectation import Expectation
3939
from ads.feature_store.feature_option_details import FeatureOptionDetails
4040
from ads.feature_store.service.oci_dataset import OCIDataset
41-
from ads.feature_store.statistics import Statistics
41+
from ads.feature_store.statistics.statistics import Statistics
4242
from ads.feature_store.statistics_config import StatisticsConfig
4343
from ads.feature_store.service.oci_lineage import OCILineage
4444
from ads.feature_store.model_details import ModelDetails

ads/feature_store/feature_group.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from ads.feature_store.service.oci_feature_group import OCIFeatureGroup
4545
from ads.feature_store.service.oci_feature_group_job import OCIFeatureGroupJob
4646
from ads.feature_store.service.oci_lineage import OCILineage
47-
from ads.feature_store.statistics import Statistics
47+
from ads.feature_store.statistics.statistics import Statistics
4848
from ads.feature_store.statistics_config import StatisticsConfig
4949
from ads.feature_store.validation_output import ValidationOutput
5050

ads/feature_store/statistics/charts/box_plot.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
# -*- coding: utf-8; -*-
33
# Copyright (c) 2023 Oracle and/or its affiliates.
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5+
from typing import List
56

67
from ads.common.decorator.runtime_dependency import OptionalDependency
78
from ads.feature_store.statistics.charts.abstract_feature_stat import AbsFeatureStat
9+
from ads.feature_store.statistics.charts.frequency_distribution import (
10+
FrequencyDistribution,
11+
)
812
from ads.feature_store.statistics.generic_feature_value import GenericFeatureValue
913

1014
try:
@@ -23,6 +27,8 @@ class BoxPlot(AbsFeatureStat):
2327
CONST_SD = "StandardDeviation"
2428
CONST_MEAN = "Mean"
2529
CONST_BOX_PLOT_TITLE = "Box Plot"
30+
CONST_IQR = "IQR"
31+
CONST_FREQUENCY_DISTRIBUTION = "FrequencyDistribution"
2632

2733
class Quartiles:
2834
CONST_Q1 = "q1"
@@ -52,16 +58,15 @@ def __init__(
5258
sd: float,
5359
q1: float,
5460
q3: float,
55-
min: float,
56-
max: float,
61+
boxpoints: List[float],
5762
):
5863
self.mean = mean
5964
self.median = median
6065
self.q1 = q1
6166
self.q3 = q3
6267
self.sd = sd
63-
self.min = min
64-
self.max = max
68+
self.iqr = self.q3 - self.q1
69+
self.boxpoints = boxpoints
6570

6671
def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
6772
xaxis_str, yaxis_str, x_str, y_str = self.get_x_y_str_axes(xaxis, yaxis)
@@ -71,14 +76,30 @@ def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
7176
q1=[self.q1],
7277
q3=[self.q3],
7378
sd=[self.sd],
74-
upperfence=[self.max],
75-
lowerfence=[self.min],
79+
y=[self.boxpoints],
80+
upperfence=[self.q3 + 1.5 * self.iqr],
81+
lowerfence=[self.q1 - 1.5 * self.iqr],
7682
xaxis=x_str,
7783
yaxis=y_str,
84+
name="",
85+
jitter=0,
7886
)
7987
fig.layout.annotations[xaxis].text = self.CONST_BOX_PLOT_TITLE
8088
fig.layout[yaxis_str]["title"] = "Values"
8189

90+
@staticmethod
91+
def get_boxpoints_from_frequency_distribution(
92+
frequency_distribution: FrequencyDistribution,
93+
) -> List[float]:
94+
boxpoints = []
95+
if frequency_distribution is not None:
96+
for frequency, bin in zip(
97+
frequency_distribution.frequency, frequency_distribution.bins
98+
):
99+
boxpoints.extend([bin] * frequency)
100+
101+
return boxpoints
102+
82103
@classmethod
83104
def from_json(cls, json_dict: dict) -> "BoxPlot":
84105
if type(json_dict) is dict and json_dict.get(cls.CONST_QUARTILES) is not None:
@@ -89,8 +110,11 @@ def from_json(cls, json_dict: dict) -> "BoxPlot":
89110
sd=GenericFeatureValue.from_json(json_dict.get(cls.CONST_SD)).val,
90111
q1=quartiles.q1,
91112
q3=quartiles.q3,
92-
min=GenericFeatureValue.from_json(json_dict.get(cls.CONST_MIN)).val,
93-
max=GenericFeatureValue.from_json(json_dict.get(cls.CONST_MAX)).val,
113+
boxpoints=cls.get_boxpoints_from_frequency_distribution(
114+
FrequencyDistribution.from_json(
115+
json_dict.get(cls.CONST_FREQUENCY_DISTRIBUTION)
116+
)
117+
),
94118
)
95119
else:
96120
return None

ads/feature_store/statistics/feature_stat.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ class FeatureStatistics:
3939
def __init__(
4040
self,
4141
feature_name: str,
42-
top_k_frequent_elements: TopKFrequentElements,
43-
frequency_distribution: FrequencyDistribution,
44-
probability_distribution: ProbabilityDistribution,
45-
box_plot: BoxPlot,
42+
top_k_frequent_elements: TopKFrequentElements = None,
43+
frequency_distribution: FrequencyDistribution = None,
44+
probability_distribution: ProbabilityDistribution = None,
45+
box_plot: BoxPlot = None,
4646
):
4747
self.feature_name: str = feature_name
4848
self.top_k_frequent_elements = top_k_frequent_elements
@@ -65,7 +65,7 @@ def from_json(cls, feature_name: str, json_dict: dict) -> "FeatureStatistics":
6565
BoxPlot.from_json(json_dict),
6666
)
6767
else:
68-
return None
68+
return cls(feature_name)
6969

7070
@property
7171
def __feature_stat_objects__(self) -> List[AbsFeatureStat]:
@@ -81,11 +81,18 @@ def __feature_stat_objects__(self) -> List[AbsFeatureStat]:
8181
]
8282

8383
def to_viz(self):
84+
# TODO: make it generic
85+
def next_graph_position_generator():
86+
yield 1
87+
yield 0
88+
yield 2
89+
8490
graph_count = len(self.__feature_stat_objects__)
8591
if graph_count > 0:
86-
fig = make_subplots(cols=graph_count, column_titles=["title"] * graph_count)
87-
for idx, stat in enumerate(
88-
[stat for stat in self.__feature_stat_objects__ if stat is not None]
92+
fig = make_subplots(cols=3, column_titles=[" "] * 3)
93+
for idx, stat in zip(
94+
next_graph_position_generator(),
95+
[stat for stat in self.__feature_stat_objects__ if stat is not None],
8996
):
9097
stat.add_to_figure(fig, idx, idx)
9198

ads/feature_store/statistics/statistics.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,5 @@ def to_viz(self, feature_list: List[str] = None):
3232
[
3333
FeatureStatistics.from_json(feature, stat).to_viz()
3434
for feature, stat in stats.items()
35-
if FeatureStatistics.from_json(feature, stat) is not None
36-
and (feature_list is None or feature in feature_list)
35+
if (feature_list is None or feature in feature_list)
3736
]

0 commit comments

Comments
 (0)