|
| 1 | +"""Explainers.pdp module""" |
| 2 | + |
| 3 | +import math |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import pandas as pd |
| 6 | +from pandas.io.formats.style import Styler |
| 7 | + |
| 8 | +from jpype import ( |
| 9 | + JImplements, |
| 10 | + JOverride, |
| 11 | +) |
| 12 | + |
| 13 | +# pylint: disable = import-error |
| 14 | +from org.kie.trustyai.explainability.global_ import pdp |
| 15 | + |
| 16 | +# pylint: disable = import-error |
| 17 | +from org.kie.trustyai.explainability.model import ( |
| 18 | + PredictionProvider, |
| 19 | + PredictionInputsDataDistribution, |
| 20 | + PredictionOutput, |
| 21 | + Output, |
| 22 | + Type, |
| 23 | + Value, |
| 24 | +) |
| 25 | + |
| 26 | +from trustyai.utils.data_conversions import ManyInputsUnionType, many_inputs_convert |
| 27 | + |
| 28 | +from .explanation_results import ExplanationResults |
| 29 | + |
| 30 | + |
| 31 | +class PDPResults(ExplanationResults): |
| 32 | + """ |
| 33 | + Results class for Partial Dependence Plots |
| 34 | + """ |
| 35 | + |
| 36 | + def __init__(self, pdp_graphs): |
| 37 | + self.pdp_graphs = pdp_graphs |
| 38 | + |
| 39 | + def as_dataframe(self) -> pd.DataFrame: |
| 40 | + """ |
| 41 | + Returns |
| 42 | + ------- |
| 43 | + a pd.DataFrame with input values and feature name as |
| 44 | + columns and marginal feature outputs as rows |
| 45 | + """ |
| 46 | + pdp_series_list = [] |
| 47 | + for pdp_graph in self.pdp_graphs: |
| 48 | + inputs = [self._to_plottable(x) for x in pdp_graph.getX()] |
| 49 | + outputs = [self._to_plottable(y) for y in pdp_graph.getY()] |
| 50 | + pdp_dict = dict(zip(inputs, outputs)) |
| 51 | + pdp_dict["feature"] = "" + str(pdp_graph.getFeature().getName()) |
| 52 | + pdp_series = pd.Series(index=inputs + ["feature"], data=pdp_dict) |
| 53 | + pdp_series_list.append(pdp_series) |
| 54 | + pdp_df = pd.DataFrame(pdp_series_list) |
| 55 | + return pdp_df |
| 56 | + |
| 57 | + def as_html(self) -> Styler: |
| 58 | + """ |
| 59 | + Returns |
| 60 | + ------- |
| 61 | + Style object from the PDP pd.DataFrame (see as_dataframe) |
| 62 | + """ |
| 63 | + return self.as_dataframe().style |
| 64 | + |
| 65 | + def plot(self, output_name=None, block=True) -> None: |
| 66 | + """ |
| 67 | + Parameters |
| 68 | + ---------- |
| 69 | + output_name: str |
| 70 | + name of the output to be plotted |
| 71 | + Default to None |
| 72 | + block: bool |
| 73 | + whether the plotting operation |
| 74 | + should be blocking or not |
| 75 | + """ |
| 76 | + fig, axs = plt.subplots(len(self.pdp_graphs), constrained_layout=True) |
| 77 | + p_idx = 0 |
| 78 | + for pdp_graph in self.pdp_graphs: |
| 79 | + if output_name is not None and output_name != str( |
| 80 | + pdp_graph.getOutput().getName() |
| 81 | + ): |
| 82 | + continue |
| 83 | + fig.suptitle(str(pdp_graph.getOutput().getName())) |
| 84 | + pdp_x = [] |
| 85 | + for i in range(len(pdp_graph.getX())): |
| 86 | + pdp_x.append(self._to_plottable(pdp_graph.getX()[i])) |
| 87 | + pdp_y = [] |
| 88 | + for i in range(len(pdp_graph.getY())): |
| 89 | + pdp_y.append(self._to_plottable(pdp_graph.getY()[i])) |
| 90 | + axs[p_idx].plot(pdp_x, pdp_y) |
| 91 | + axs[p_idx].set_title( |
| 92 | + str(pdp_graph.getFeature().getName()), loc="left", fontsize="small" |
| 93 | + ) |
| 94 | + axs[p_idx].grid() |
| 95 | + p_idx += 1 |
| 96 | + fig.supylabel("Partial Dependence Plot") |
| 97 | + plt.show(block=block) |
| 98 | + |
| 99 | + @staticmethod |
| 100 | + def _to_plottable(datum: Value): |
| 101 | + plottable = datum.asNumber() |
| 102 | + if math.isnan(plottable): |
| 103 | + plottable = str(datum.asString()) |
| 104 | + return plottable |
| 105 | + |
| 106 | + |
| 107 | +# pylint: disable = too-few-public-methods |
| 108 | +class PDPExplainer: |
| 109 | + """ |
| 110 | + Partial Dependence Plot explainer. |
| 111 | + See https://christophm.github.io/interpretable-ml-book/pdp.html |
| 112 | + """ |
| 113 | + |
| 114 | + def __init__(self, config=None): |
| 115 | + if config is None: |
| 116 | + config = pdp.PartialDependencePlotConfig() |
| 117 | + self._explainer = pdp.PartialDependencePlotExplainer(config) |
| 118 | + |
| 119 | + def explain( |
| 120 | + self, model: PredictionProvider, data: ManyInputsUnionType, num_outputs: int = 1 |
| 121 | + ) -> PDPResults: |
| 122 | + """ |
| 123 | + Parameters |
| 124 | + ---------- |
| 125 | + model: PredictionProvider |
| 126 | + the model to explain |
| 127 | + data: ManyInputsUnionType |
| 128 | + the data used to calculate the PDP |
| 129 | + num_outputs: int |
| 130 | + the number of outputs to calculate the PDP for |
| 131 | +
|
| 132 | + Returns |
| 133 | + ------- |
| 134 | + pdp_results: PDPResults |
| 135 | + the partial dependence plots associated to the model outputs |
| 136 | + """ |
| 137 | + metadata = _PredictionProviderMetadata(many_inputs_convert(data), num_outputs) |
| 138 | + pdp_graphs = self._explainer.explainFromMetadata(model, metadata) |
| 139 | + return PDPResults(pdp_graphs) |
| 140 | + |
| 141 | + |
| 142 | +@JImplements( |
| 143 | + "org.kie.trustyai.explainability.model.PredictionProviderMetadata", deferred=True |
| 144 | +) |
| 145 | +class _PredictionProviderMetadata: |
| 146 | + """ |
| 147 | + Implementation of org.kie.trustyai.explainability.model.PredictionProviderMetadata interface |
| 148 | + """ |
| 149 | + |
| 150 | + def __init__(self, data: list, size: int): |
| 151 | + """ |
| 152 | + Parameters |
| 153 | + ---------- |
| 154 | + data: ManyInputsUnionType |
| 155 | + the data |
| 156 | + size: int |
| 157 | + the size of the model output |
| 158 | + """ |
| 159 | + self.data = PredictionInputsDataDistribution(data) |
| 160 | + outputs = [] |
| 161 | + for _ in range(size): |
| 162 | + outputs.append(Output("", Type.UNDEFINED)) |
| 163 | + self.pred_out = PredictionOutput(outputs) |
| 164 | + |
| 165 | + # pylint: disable = invalid-name |
| 166 | + @JOverride |
| 167 | + def getDataDistribution(self): |
| 168 | + """ |
| 169 | + Returns |
| 170 | + -------- |
| 171 | + the underlying data distribution |
| 172 | + """ |
| 173 | + return self.data |
| 174 | + |
| 175 | + # pylint: disable = invalid-name |
| 176 | + @JOverride |
| 177 | + def getInputShape(self): |
| 178 | + """ |
| 179 | + Returns |
| 180 | + -------- |
| 181 | + a PredictionInput from the underlying distribution |
| 182 | + """ |
| 183 | + return self.data.sample() |
| 184 | + |
| 185 | + # pylint: disable = invalid-name |
| 186 | + @JOverride |
| 187 | + def getOutputShape(self): |
| 188 | + """ |
| 189 | + Returns |
| 190 | + -------- |
| 191 | + a PredictionOutput |
| 192 | + """ |
| 193 | + return self.pred_out |
0 commit comments