@@ -101,44 +101,27 @@ def _saliency_to_dataframe(self, saliency, output_name):
101
101
],
102
102
0 ,
103
103
).tolist ()
104
- feature_values = [
105
- pfi .getFeature ().getValue ().asNumber ()
106
- for pfi in saliency .getPerFeatureImportance ()[:- 1 ]
107
- ]
108
- shap_values = [
109
- pfi .getScore () for pfi in saliency .getPerFeatureImportance ()[:- 1 ]
110
- ]
111
- feature_names = [
112
- str (pfi .getFeature ().getName ())
113
- for pfi in saliency .getPerFeatureImportance ()[:- 1 ]
114
- ]
115
-
116
- columns = ["Mean Background Value" , "Feature Value" , "SHAP Value" ]
117
- visualizer_data_frame = pd .DataFrame (
118
- [background_mean_feature_values , feature_values , shap_values ],
119
- index = columns ,
120
- columns = feature_names ,
121
- ).T
122
- fnull = self .get_fnull ()[output_name ]
123
104
124
- return (
125
- pd .concat (
126
- [
127
- pd .DataFrame (
128
- [["-" , "-" , fnull ]], index = ["Background" ], columns = columns
129
- ),
130
- visualizer_data_frame ,
131
- pd .DataFrame (
132
- [[fnull , sum (shap_values ) + fnull , sum (shap_values ) + fnull ]],
133
- index = ["Prediction" ],
134
- columns = columns ,
135
- ),
136
- ]
137
- ),
138
- feature_names ,
139
- shap_values ,
140
- background_mean_feature_values ,
141
- )
105
+ data_rows = []
106
+ for i , pfi in enumerate (saliency .getPerFeatureImportance ()[:- 1 ]):
107
+ data_rows .append (
108
+ {
109
+ "Feature" : str (pfi .getFeature ().getName ().toString ()),
110
+ "Value" : pfi .getFeature ().getValue ().getUnderlyingObject (),
111
+ "Mean Background Value" : background_mean_feature_values [i ],
112
+ "SHAP Value" : pfi .getScore (),
113
+ "Confidence" : pfi .getConfidence (),
114
+ }
115
+ )
116
+
117
+ fnull = {
118
+ "Feature" : "Background" ,
119
+ "Value" : None ,
120
+ "Mean Background Value" : None ,
121
+ "SHAP Value" : self .get_fnull ()[output_name ],
122
+ }
123
+
124
+ return pd .DataFrame ([fnull ] + data_rows )
142
125
143
126
def as_dataframe (self ) -> Dict [str , pd .DataFrame ]:
144
127
"""
@@ -148,16 +131,18 @@ def as_dataframe(self) -> Dict[str, pd.DataFrame]:
148
131
-------
149
132
Dict[str, pandas.DataFrame]
150
133
Dictionary of DataFrames, keyed by output name, containing the results of the SHAP
151
- explanation. For each model output, the table will contain the following columns,
152
- indexed by feature name:
134
+ explanation. For each model output, the table will contain the following columns:
153
135
154
- * ``Mean Background Value ``: The mean value this feature took in the background
136
+ * ``Feature ``: The name of the feature
155
137
* ``Feature Value``: The value of the feature for this particular input.
138
+ * ``Mean Background Value``: The mean value this feature took in the background
156
139
* ``SHAP Value``: The found SHAP value of this feature.
140
+ * ``Confidence``: The confidence of this explanation as returned by the explainer.
141
+
157
142
"""
158
143
df_dict = {}
159
144
for output_name , saliency in self .saliency_map ().items ():
160
- df_dict [output_name ] = self ._saliency_to_dataframe (saliency , output_name )[ 0 ]
145
+ df_dict [output_name ] = self ._saliency_to_dataframe (saliency , output_name )
161
146
return df_dict
162
147
163
148
def as_html (self ) -> Dict [str , pd .io .formats .style .Styler ]:
@@ -179,23 +164,21 @@ def as_html(self) -> Dict[str, pd.io.formats.style.Styler]:
179
164
def _color_feature_values (feature_values , background_vals ):
180
165
"""Internal function for the dataframe visualization"""
181
166
formats = []
182
- for i , feature_value in enumerate (feature_values [1 :- 1 ]):
167
+ for i , feature_value in enumerate (feature_values [1 :]):
183
168
if feature_value < background_vals [i ]:
184
169
formats .append (f"background-color:{ ds ['negative_primary_colour' ]} " )
185
170
elif feature_value > background_vals [i ]:
186
171
formats .append (f"background-color:{ ds ['positive_primary_colour' ]} " )
187
172
else :
188
173
formats .append (None )
189
- return [None ] + formats + [ None ]
174
+ return [None ] + formats
190
175
191
176
df_dict = {}
192
- for i , (output_name , saliency ) in enumerate (self .saliency_map ().items ()):
193
- (
194
- df ,
195
- feature_names ,
196
- shap_values ,
197
- background_mean_feature_values ,
198
- ) = self ._saliency_to_dataframe (saliency , i )
177
+ for output_name , saliency in self .saliency_map ().items ():
178
+ df = self ._saliency_to_dataframe (saliency , output_name )
179
+ shap_values = df ["SHAP Value" ].values [1 :]
180
+ background_mean_feature_values = df ["Mean Background Value" ].values [1 :]
181
+
199
182
style = df .style .background_gradient (
200
183
LinearSegmentedColormap .from_list (
201
184
name = "rwg" ,
@@ -205,15 +188,15 @@ def _color_feature_values(feature_values, background_vals):
205
188
ds ["positive_primary_colour" ],
206
189
],
207
190
),
208
- subset = (slice (feature_names [ 0 ], feature_names [ - 1 ] ), "SHAP Value" ),
191
+ subset = (slice (1 , None ), "SHAP Value" ),
209
192
vmin = - 1 * max (np .abs (shap_values )),
210
193
vmax = max (np .abs (shap_values )),
211
194
)
212
195
style .set_caption (f"Explanation of { output_name } " )
213
196
df_dict [output_name ] = style .apply (
214
197
_color_feature_values ,
215
198
background_vals = background_mean_feature_values ,
216
- subset = "Feature Value" ,
199
+ subset = "Value" ,
217
200
axis = 0 ,
218
201
)
219
202
return df_dict
0 commit comments