4
4
from sklearn .preprocessing import MinMaxScaler
5
5
from sklearn .ensemble import RandomForestClassifier
6
6
from sklearn .metrics import accuracy_score
7
+ import os
8
+ import numpy as np
9
+ import tensorflow as tf
7
10
8
- def test_cancer_risk ( ):
9
- data_df = pd .read_csv ('lung_cancer.csv' )
11
+ def sklearn_test ( attrib ):
12
+ data_df = pd .read_csv (os . path . join ( os . path . dirname ( __file__ ), 'lung_cancer.csv' ) )
10
13
data_df = data_df .replace ({'Level' : {'Low' : 0 , 'Medium' : 1 , 'High' : 2 }})
11
14
data_df = data_df .replace ({'Gender' : {2 : 0 }})
12
15
data_df = data_df .replace ({'Alcohol use' : {2 : 0 }})
@@ -28,37 +31,105 @@ def test_cancer_risk():
28
31
print ("Train test split complete!" )
29
32
30
33
scaler = MinMaxScaler ()
31
- x_train = scaler .fit_transform (x_train )
32
- x_test = scaler .transform (x_test )
34
+ x_train_norm = scaler .fit_transform (x_train )
35
+ x_test_norm = scaler .transform (x_test )
33
36
print ("Data transform complete!" )
34
37
35
- rfx = RandomForestClassifier (n_estimators = 500 )
38
+ rfx = RandomForestClassifier (n_estimators = 100 )
36
39
rfx .fit (x_train , y_train )
37
40
print (accuracy_score (y_test , rfx .predict (x_test )))
38
41
print ("Training classifier complete!" )
39
42
40
- x_test = x_test [: 10 ]
41
- y_test = rfx .predict (x_test [: 10 ])
43
+ test_instance = x_test_norm [ 10 ]
44
+ test_label = rfx .predict ([ x_test_norm [ 10 ]])[ 0 ]
42
45
cat_indices = [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 ]
46
+ imm_indices = [0 , 1 , 2 ]
47
+ discern = DisCERNTabular (rfx , attrib )
48
+ discern .init_data (x_train_norm , y_train , [c for c in df .columns if c != 'Level' ], ['Low' , 'Medium' , 'High' ], cat_feature_indices = cat_indices , immutable_feature_indices = imm_indices )
43
49
44
- sparsity = []
45
- proximity = []
46
- discern = DisCERNTabular (rfx , 'LIME' , 'Q' )
47
- discern .init_data (x_train , y_train , [c for c in df .columns if c != 'Level' ], ['Low' , 'Medium' , 'High' ], cat_feature_indices = cat_indices )
50
+ cf , cf_label , s , p = discern .find_cf (test_instance , test_label , cf_label = 0 )
51
+ print ('---------------------sklearn-' + attrib + '---------------------' )
52
+ print (cf , cf_label )
53
+ print (test_instance , test_label )
54
+ print ("Sparsity: " ,s , "Proximity: " , p )
48
55
49
- for idx in range (len (x_test )):
50
- if y_test [idx ] == 0 :
51
- continue
52
- cf , s , p = discern .find_cf (x_test [idx ], y_test [idx ], desired_class = 'Low' )
53
- print (s )
54
- print (p )
55
- sparsity .append (s )
56
- proximity .append (p )
57
56
58
- _sparsity = sum (sparsity )/ len (sparsity )
59
- _proximity = sum (proximity )/ (len (proximity )* _sparsity )
60
- print (_sparsity )
61
- print (_proximity )
57
+ def keras_test (attrib ):
58
+ data_df = pd .read_csv (os .path .join (os .path .dirname (__file__ ), 'lung_cancer.csv' ))
59
+ data_df = data_df .replace ({'Level' : {'Low' : 0 , 'Medium' : 1 , 'High' : 2 }})
60
+ data_df = data_df .replace ({'Gender' : {2 : 0 }})
61
+ data_df = data_df .replace ({'Alcohol use' : {2 : 0 }})
62
+ data_df = data_df .replace ({'Dust Allergy' : {2 : 0 }})
63
+ data_df = data_df .replace ({'Smoking' : {2 : 0 }})
64
+ data_df = data_df .replace ({'Chest Pain' : {2 : 0 }})
65
+ data_df = data_df .replace ({'Fatigue' : {2 : 0 }})
66
+ data_df = data_df .replace ({'Shortness of Breath' : {2 : 0 }})
67
+ data_df = data_df .replace ({'Wheezing' : {2 : 0 }})
68
+ data_df = data_df .replace ({'Swallowing Difficulty' : {2 : 0 }})
69
+ data_df = data_df .replace ({'Cough' : {2 : 0 }})
70
+ data_df = data_df .replace ({'chronic Lung Disease' : {2 : 0 }})
71
+ print ("Reading data complete!" )
72
+
73
+ df = data_df .copy ()
74
+ x = df .loc [:, df .columns != 'Level' ].values
75
+ y = df ['Level' ].values
76
+ x_train , x_test , y_train , y_test = train_test_split (x , y , test_size = 0.33 , random_state = 1 )
77
+ print ("Train test split complete!" )
78
+
79
+ scaler = MinMaxScaler ()
80
+ x_train_norm = scaler .fit_transform (x_train )
81
+ x_test_norm = scaler .transform (x_test )
82
+ y_train_cat = tf .keras .utils .to_categorical (y_train , num_classes = len (df ['Level' ].unique ()), dtype = 'float32' )
83
+ y_test_cat = tf .keras .utils .to_categorical (y_test , num_classes = len (df ['Level' ].unique ()), dtype = 'float32' )
84
+ print ("Data transform complete!" )
85
+
86
+ inputs = tf .keras .Input (shape = (x_train_norm .shape [- 1 ],))
87
+ hidden1 = tf .keras .layers .Dense (64 , activation = 'relu' )(inputs )
88
+ hidden2 = tf .keras .layers .Dense (64 , activation = 'relu' )(hidden1 )
89
+ outputs = tf .keras .layers .Dense (len (df ['Level' ].unique ()), activation = 'softmax' )(hidden2 )
90
+
91
+ model = tf .keras .Model (inputs = inputs , outputs = outputs , name = "model" )
92
+
93
+ model .compile (
94
+ loss = 'categorical_crossentropy' ,
95
+ optimizer = 'Adam' ,
96
+ metrics = ['accuracy' ])
97
+
98
+ model .fit (x_train_norm , y_train_cat , validation_data = (x_test , y_test_cat ), batch_size = 32 , epochs = 5 , verbose = 0 )
99
+ print ("Training classifier complete: " , accuracy_score (y_test , model .predict (x_test_norm ).argmax (axis = - 1 )))
100
+
101
+ test_instance = x_test_norm [12 ]
102
+ test_label = model .predict (np .array ([x_test_norm [12 ]])).argmax (axis = - 1 )[0 ]
103
+
104
+ cat_indices = [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 ]
105
+ imm_indices = [0 , 1 , 2 ]
106
+ discern = DisCERNTabular (model , attrib )
107
+ print ('labels' , set ( model .predict (x_train_norm ).argmax (axis = - 1 )))
108
+ discern .init_data (x_train_norm , model .predict (x_train_norm ).argmax (axis = - 1 ), [c for c in df .columns if c != 'Level' ], ['Low' , 'Medium' , 'High' ], cat_feature_indices = cat_indices , immutable_feature_indices = imm_indices )
62
109
110
+ cf , cf_label , s , p = discern .find_cf (test_instance , test_label , cf_label = 0 )
111
+ print ('---------------------sklearn-' + attrib + '---------------------' )
112
+ print (cf , cf_label )
113
+ print (test_instance , test_label )
114
+ print ("Sparsity: " ,s , "Proximity: " , p )
63
115
64
- test_cancer_risk ()
116
+ try :
117
+ sklearn_test ('LIME' )
118
+ except :
119
+ None
120
+ try :
121
+ sklearn_test ('SHAP' )
122
+ except :
123
+ None
124
+ try :
125
+ keras_test ('LIME' )
126
+ except :
127
+ None
128
+ try :
129
+ keras_test ('SHAP' )
130
+ except :
131
+ None
132
+ try :
133
+ keras_test ('IntG' )
134
+ except :
135
+ None
0 commit comments