2
2
# pylint: disable = import-error
3
3
from typing import List , Optional , Any , Union
4
4
5
+ import numpy as np
5
6
import pandas as pd
6
7
from jpype import JInt
7
8
from org .kie .trustyai .explainability .metrics import FairnessMetrics
8
9
9
10
from trustyai .model import Value , PredictionProvider , Model
10
11
from trustyai .utils .data_conversions import (
11
- pandas_to_trusty ,
12
12
OneOutputUnionType ,
13
13
one_output_convert ,
14
+ to_trusty_dataframe ,
14
15
)
15
16
16
17
ColumSelector = Union [List [int ], List [str ]]
17
18
18
19
19
20
def _column_selector_to_index (columns : ColumSelector , dataframe : pd .DataFrame ):
21
+ """Returns a list of input and output indices, given an index size and output indices"""
20
22
if len (columns ) == 0 :
21
23
raise ValueError ("Must specify at least one column" )
22
24
@@ -27,32 +29,40 @@ def _column_selector_to_index(columns: ColumSelector, dataframe: pd.DataFrame):
27
29
28
30
29
31
def statistical_parity_difference (
30
- privileged : pd .DataFrame ,
31
- unprivileged : pd .DataFrame ,
32
+ privileged : Union [ pd .DataFrame , np . ndarray ] ,
33
+ unprivileged : Union [ pd .DataFrame , np . ndarray ] ,
32
34
favorable : OneOutputUnionType ,
33
35
outputs : Optional [List [int ]] = None ,
36
+ feature_names : Optional [List [str ]] = None ,
34
37
) -> float :
35
38
"""Calculate Statistical Parity Difference between privileged and unprivileged dataframes"""
36
39
favorable_prediction_object = one_output_convert (favorable )
37
40
return FairnessMetrics .groupStatisticalParityDifference (
38
- pandas_to_trusty (privileged , outputs ),
39
- pandas_to_trusty (unprivileged , outputs ),
41
+ to_trusty_dataframe (
42
+ data = privileged , outputs = outputs , feature_names = feature_names
43
+ ),
44
+ to_trusty_dataframe (
45
+ data = unprivileged , outputs = outputs , feature_names = feature_names
46
+ ),
40
47
favorable_prediction_object .outputs ,
41
48
)
42
49
43
50
44
- # pylint: disable = line-too-long
51
+ # pylint: disable = line-too-long, too-many-arguments
45
52
def statistical_parity_difference_model (
46
- samples : pd .DataFrame ,
53
+ samples : Union [ pd .DataFrame , np . ndarray ] ,
47
54
model : Union [PredictionProvider , Model ],
48
55
privilege_columns : ColumSelector ,
49
56
privilege_values : List [Any ],
50
57
favorable : OneOutputUnionType ,
58
+ feature_names : Optional [List [str ]] = None ,
51
59
) -> float :
52
60
"""Calculate Statistical Parity Difference using a samples dataframe and a model"""
53
61
favorable_prediction_object = one_output_convert (favorable )
54
62
_privilege_values = [Value (v ) for v in privilege_values ]
55
- _jsamples = pandas_to_trusty (samples , no_outputs = True )
63
+ _jsamples = to_trusty_dataframe (
64
+ data = samples , no_outputs = True , feature_names = feature_names
65
+ )
56
66
return FairnessMetrics .groupStatisticalParityDifference (
57
67
_jsamples ,
58
68
model ,
@@ -63,32 +73,40 @@ def statistical_parity_difference_model(
63
73
64
74
65
75
def disparate_impact_ratio (
66
- privileged : pd .DataFrame ,
67
- unprivileged : pd .DataFrame ,
76
+ privileged : Union [ pd .DataFrame , np . ndarray ] ,
77
+ unprivileged : Union [ pd .DataFrame , np . ndarray ] ,
68
78
favorable : OneOutputUnionType ,
69
79
outputs : Optional [List [int ]] = None ,
80
+ feature_names : Optional [List [str ]] = None ,
70
81
) -> float :
71
82
"""Calculate Disparate Impact Ration between privileged and unprivileged dataframes"""
72
83
favorable_prediction_object = one_output_convert (favorable )
73
84
return FairnessMetrics .groupDisparateImpactRatio (
74
- pandas_to_trusty (privileged , outputs ),
75
- pandas_to_trusty (unprivileged , outputs ),
85
+ to_trusty_dataframe (
86
+ data = privileged , outputs = outputs , feature_names = feature_names
87
+ ),
88
+ to_trusty_dataframe (
89
+ data = unprivileged , outputs = outputs , feature_names = feature_names
90
+ ),
76
91
favorable_prediction_object .outputs ,
77
92
)
78
93
79
94
80
95
# pylint: disable = line-too-long
81
96
def disparate_impact_ratio_model (
82
- samples : pd .DataFrame ,
97
+ samples : Union [ pd .DataFrame , np . ndarray ] ,
83
98
model : Union [PredictionProvider , Model ],
84
99
privilege_columns : ColumSelector ,
85
100
privilege_values : List [Any ],
86
101
favorable : OneOutputUnionType ,
102
+ feature_names : Optional [List [str ]] = None ,
87
103
) -> float :
88
104
"""Calculate Disparate Impact Ration using a samples dataframe and a model"""
89
105
favorable_prediction_object = one_output_convert (favorable )
90
106
_privilege_values = [Value (v ) for v in privilege_values ]
91
- _jsamples = pandas_to_trusty (samples , no_outputs = True )
107
+ _jsamples = to_trusty_dataframe (
108
+ data = samples , no_outputs = True , feature_names = feature_names
109
+ )
92
110
return FairnessMetrics .groupDisparateImpactRatio (
93
111
_jsamples ,
94
112
model ,
@@ -100,12 +118,13 @@ def disparate_impact_ratio_model(
100
118
101
119
# pylint: disable = too-many-arguments
102
120
def average_odds_difference (
103
- test : pd .DataFrame ,
104
- truth : pd .DataFrame ,
121
+ test : Union [ pd .DataFrame , np . ndarray ] ,
122
+ truth : Union [ pd .DataFrame , np . ndarray ] ,
105
123
privilege_columns : ColumSelector ,
106
124
privilege_values : OneOutputUnionType ,
107
125
positive_class : List [Any ],
108
126
outputs : Optional [List [int ]] = None ,
127
+ feature_names : Optional [List [str ]] = None ,
109
128
) -> float :
110
129
"""Calculate Average Odds between two dataframes"""
111
130
if test .shape != truth .shape :
@@ -117,23 +136,26 @@ def average_odds_difference(
117
136
# determine privileged columns
118
137
_privilege_columns = _column_selector_to_index (privilege_columns , test )
119
138
return FairnessMetrics .groupAverageOddsDifference (
120
- pandas_to_trusty ( test , outputs ),
121
- pandas_to_trusty ( truth , outputs ),
139
+ to_trusty_dataframe ( data = test , outputs = outputs , feature_names = feature_names ),
140
+ to_trusty_dataframe ( data = truth , outputs = outputs , feature_names = feature_names ),
122
141
_privilege_columns ,
123
142
_privilege_values ,
124
143
_positive_class ,
125
144
)
126
145
127
146
128
147
def average_odds_difference_model (
129
- samples : pd .DataFrame ,
148
+ samples : Union [ pd .DataFrame , np . ndarray ] ,
130
149
model : Union [PredictionProvider , Model ],
131
150
privilege_columns : ColumSelector ,
132
151
privilege_values : List [Any ],
133
152
positive_class : List [Any ],
153
+ feature_names : Optional [List [str ]] = None ,
134
154
) -> float :
135
155
"""Calculate Average Odds for a sample dataframe using the provided model"""
136
- _jsamples = pandas_to_trusty (samples , no_outputs = True )
156
+ _jsamples = to_trusty_dataframe (
157
+ data = samples , no_outputs = True , feature_names = feature_names
158
+ )
137
159
_privilege_values = [Value (v ) for v in privilege_values ]
138
160
_positive_class = [Value (v ) for v in positive_class ]
139
161
# determine privileged columns
@@ -144,12 +166,13 @@ def average_odds_difference_model(
144
166
145
167
146
168
def average_predictive_value_difference (
147
- test : pd .DataFrame ,
148
- truth : pd .DataFrame ,
169
+ test : Union [ pd .DataFrame , np . ndarray ] ,
170
+ truth : Union [ pd .DataFrame , np . ndarray ] ,
149
171
privilege_columns : ColumSelector ,
150
172
privilege_values : List [Any ],
151
173
positive_class : List [Any ],
152
174
outputs : Optional [List [int ]] = None ,
175
+ feature_names : Optional [List [str ]] = None ,
153
176
) -> float :
154
177
"""Calculate Average Predictive Value Difference between two dataframes"""
155
178
if test .shape != truth .shape :
@@ -160,8 +183,8 @@ def average_predictive_value_difference(
160
183
_positive_class = [Value (v ) for v in positive_class ]
161
184
_privilege_columns = _column_selector_to_index (privilege_columns , test )
162
185
return FairnessMetrics .groupAveragePredictiveValueDifference (
163
- pandas_to_trusty ( test , outputs ),
164
- pandas_to_trusty ( truth , outputs ),
186
+ to_trusty_dataframe ( data = test , outputs = outputs , feature_names = feature_names ),
187
+ to_trusty_dataframe ( data = truth , outputs = outputs , feature_names = feature_names ),
165
188
_privilege_columns ,
166
189
_privilege_values ,
167
190
_positive_class ,
@@ -170,14 +193,14 @@ def average_predictive_value_difference(
170
193
171
194
# pylint: disable = line-too-long
172
195
def average_predictive_value_difference_model (
173
- samples : pd .DataFrame ,
196
+ samples : Union [ pd .DataFrame , np . ndarray ] ,
174
197
model : Union [PredictionProvider , Model ],
175
198
privilege_columns : ColumSelector ,
176
199
privilege_values : List [Any ],
177
200
positive_class : List [Any ],
178
201
) -> float :
179
202
"""Calculate Average Predictive Value Difference for a sample dataframe using the provided model"""
180
- _jsamples = pandas_to_trusty (samples , no_outputs = True )
203
+ _jsamples = to_trusty_dataframe (samples , no_outputs = True )
181
204
_privilege_values = [Value (v ) for v in privilege_values ]
182
205
_positive_class = [Value (v ) for v in positive_class ]
183
206
# determine privileged columns
0 commit comments