4
4
import sys
5
5
import pytest
6
6
import time
7
+ import numpy as np
8
+
9
+ from trustyai .explainers import LimeExplainer , SHAPExplainer
10
+ from trustyai .model import feature , PredictionInput
11
+ from trustyai .utils import TestModels
12
+ from trustyai .metrics .saliency import mean_impact_score , classification_fidelity , local_saliency_f1
13
+
14
+ from org .kie .trustyai .explainability .model import (
15
+ PredictionInputsDataDistribution ,
16
+ )
7
17
8
18
myPath = os .path .dirname (os .path .abspath (__file__ ))
9
19
sys .path .insert (0 , myPath + "/../general/" )
10
20
11
21
import test_counterfactualexplainer as tcf
12
- import test_limeexplainer as tlime
13
-
14
22
15
23
@pytest .mark .benchmark (
16
24
group = "counterfactuals" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
@@ -35,9 +43,147 @@ def test_counterfactual_match_python_model(benchmark):
35
43
"""Counterfactual match (Python model)"""
36
44
benchmark (tcf .test_counterfactual_match_python_model )
37
45
38
- # @pytest.mark.benchmark(
39
- # group="lime", min_rounds=10, timer=time.time, disable_gc=True, warmup=True
40
- # )
41
- # def test_non_empty_input(benchmark):
42
- # """Counterfactual match (Python model)"""
43
- # benchmark(tlime.test_non_empty_input)
46
+
47
+ @pytest .mark .benchmark (
48
+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
49
+ )
50
+ def test_sumskip_lime_impact_score_at_2 (benchmark ):
51
+ no_of_features = 10
52
+ np .random .seed (0 )
53
+ explainer = LimeExplainer ()
54
+ model = TestModels .getSumSkipModel (0 )
55
+ data = []
56
+ for i in range (100 ):
57
+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in range (no_of_features )])
58
+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
59
+ benchmark (mean_impact_score , explainer , model , data )
60
+
61
+
62
+ @pytest .mark .benchmark (
63
+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
64
+ )
65
+ def test_sumskip_shap_impact_score_at_2 (benchmark ):
66
+ no_of_features = 10
67
+ np .random .seed (0 )
68
+ background = []
69
+ for i in range (10 ):
70
+ background .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in range (no_of_features )]))
71
+ explainer = SHAPExplainer (background , samples = 10000 )
72
+ model = TestModels .getSumSkipModel (0 )
73
+ data = []
74
+ for i in range (100 ):
75
+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in range (no_of_features )])
76
+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
77
+ benchmark (mean_impact_score , explainer , model , data )
78
+
79
+
80
+ @pytest .mark .benchmark (
81
+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
82
+ )
83
+ def test_sumthreshold_lime_impact_score_at_2 (benchmark ):
84
+ no_of_features = 10
85
+ np .random .seed (0 )
86
+ explainer = LimeExplainer ()
87
+ center = 100.0
88
+ epsilon = 10.0
89
+ model = TestModels .getSumThresholdModel (center , epsilon )
90
+ data = []
91
+ for i in range (100 ):
92
+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )])
93
+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
94
+ benchmark (mean_impact_score , explainer , model , data )
95
+
96
+
97
+ @pytest .mark .benchmark (
98
+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
99
+ )
100
+ def test_sumthreshold_shap_impact_score_at_2 (benchmark ):
101
+ no_of_features = 10
102
+ np .random .seed (0 )
103
+ background = []
104
+ for i in range (100 ):
105
+ background .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )]))
106
+ explainer = SHAPExplainer (background , samples = 10000 )
107
+ center = 100.0
108
+ epsilon = 10.0
109
+ model = TestModels .getSumThresholdModel (center , epsilon )
110
+ data = []
111
+ for i in range (100 ):
112
+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )])
113
+ benchmark .extra_info ['metric' ] = mean_impact_score (explainer , model , data )
114
+ benchmark (mean_impact_score , explainer , model , data )
115
+
116
+
117
+ @pytest .mark .benchmark (
118
+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
119
+ )
120
+ def test_lime_fidelity (benchmark ):
121
+ no_of_features = 10
122
+ np .random .seed (0 )
123
+ explainer = LimeExplainer ()
124
+ model = TestModels .getEvenSumModel (0 )
125
+ data = []
126
+ for i in range (100 ):
127
+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )])
128
+ benchmark .extra_info ['metric' ] = classification_fidelity (explainer , model , data )
129
+ benchmark (classification_fidelity , explainer , model , data )
130
+
131
+
132
+ @pytest .mark .benchmark (
133
+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
134
+ )
135
+ def test_shap_fidelity (benchmark ):
136
+ no_of_features = 10
137
+ np .random .seed (0 )
138
+ background = []
139
+ for i in range (10 ):
140
+ background .append (PredictionInput (
141
+ [feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in
142
+ range (no_of_features )]))
143
+ explainer = SHAPExplainer (background , samples = 10000 )
144
+ model = TestModels .getEvenSumModel (0 )
145
+ data = []
146
+ for i in range (100 ):
147
+ data .append ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in
148
+ range (no_of_features )])
149
+ benchmark .extra_info ['metric' ] = classification_fidelity (explainer , model , data )
150
+ benchmark (classification_fidelity , explainer , model , data )
151
+
152
+
153
+ @pytest .mark .benchmark (
154
+ group = "lime" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
155
+ )
156
+ def test_lime_local_saliency_f1 (benchmark ):
157
+ no_of_features = 10
158
+ np .random .seed (0 )
159
+ explainer = LimeExplainer ()
160
+ model = TestModels .getEvenSumModel (0 )
161
+ output_name = "sum-even-but0"
162
+ data = []
163
+ for i in range (100 ):
164
+ data .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )]))
165
+ distribution = PredictionInputsDataDistribution (data )
166
+ benchmark .extra_info ['metric' ] = local_saliency_f1 (output_name , model , explainer , distribution , 2 , 10 )
167
+ benchmark (local_saliency_f1 , output_name , model , explainer , distribution , 2 , 10 )
168
+
169
+
170
+ @pytest .mark .benchmark (
171
+ group = "shap" , min_rounds = 10 , timer = time .time , disable_gc = True , warmup = True
172
+ )
173
+ def test_shap_local_saliency_f1 (benchmark ):
174
+ no_of_features = 10
175
+ np .random .seed (0 )
176
+ background = []
177
+ for i in range (10 ):
178
+ background .append (PredictionInput (
179
+ [feature (name = f"f-num{ i } " , value = np .random .randint (- 10 , 10 ), dtype = "number" ) for i in
180
+ range (no_of_features )]))
181
+ explainer = SHAPExplainer (background , samples = 10000 )
182
+ model = TestModels .getEvenSumModel (0 )
183
+ output_name = "sum-even-but0"
184
+ data = []
185
+ for i in range (100 ):
186
+ data .append (PredictionInput ([feature (name = f"f-num{ i } " , value = np .random .randint (- 100 , 100 ), dtype = "number" ) for i in range (no_of_features )]))
187
+ distribution = PredictionInputsDataDistribution (data )
188
+ benchmark .extra_info ['metric' ] = local_saliency_f1 (output_name , model , explainer , distribution , 2 , 10 )
189
+ benchmark (local_saliency_f1 , output_name , model , explainer , distribution , 2 , 10 )
0 commit comments