@@ -1174,6 +1174,7 @@ def calculate_model_statistics(
1174
1174
train_data : Union [DataFrame , List [list ], Type ["numpy.array" ]] = None ,
1175
1175
test_data : Union [DataFrame , List [list ], Type ["numpy.array" ]] = None ,
1176
1176
json_path : Union [str , Path , None ] = None ,
1177
+ target_type : str = "classification"
1177
1178
) -> Union [dict , None ]:
1178
1179
"""
1179
1180
Calculates fit statistics (including ROC and Lift curves) from datasets and then
@@ -1214,6 +1215,9 @@ def calculate_model_statistics(
1214
1215
Dataset pertaining to the test data. The default value is None.
1215
1216
json_path : str or Path, optional
1216
1217
Location for the output JSON files. The default value is None.
1218
+ target_type: str, optional
1219
+ Type of target the model is trying to find. Currently supports "classification"
1220
+ and "prediction" types. The default value is "classification".
1217
1221
1218
1222
Returns
1219
1223
-------
@@ -1260,18 +1264,26 @@ def calculate_model_statistics(
1260
1264
data ,
1261
1265
casout = {"name" : "assess_dataset" , "replace" : True , "caslib" : "Public" },
1262
1266
)
1263
-
1264
- conn .percentile .assess (
1265
- table = {"name" : "assess_dataset" , "caslib" : "Public" },
1266
- response = "predict" ,
1267
- pVar = "predict_proba" ,
1268
- event = str (target_value ),
1269
- pEvent = str (prob_value ) if prob_value else str (0.5 ),
1270
- inputs = "actual" ,
1271
- fitStatOut = {"name" : "FitStat" , "replace" : True , "caslib" : "Public" },
1272
- rocOut = {"name" : "ROC" , "replace" : True , "caslib" : "Public" },
1273
- casout = {"name" : "Lift" , "replace" : True , "caslib" : "Public" },
1274
- )
1267
+ if target_type == 'classification' :
1268
+ conn .percentile .assess (
1269
+ table = {"name" : "assess_dataset" , "caslib" : "Public" },
1270
+ response = "predict" ,
1271
+ pVar = "predict_proba" ,
1272
+ event = str (target_value ),
1273
+ pEvent = str (prob_value ) if prob_value else str (0.5 ),
1274
+ inputs = "actual" ,
1275
+ fitStatOut = {"name" : "FitStat" , "replace" : True , "caslib" : "Public" },
1276
+ rocOut = {"name" : "ROC" , "replace" : True , "caslib" : "Public" },
1277
+ casout = {"name" : "Lift" , "replace" : True , "caslib" : "Public" },
1278
+ )
1279
+ else :
1280
+ conn .percentile .assess (
1281
+ table = {"name" : "assess_dataset" , "caslib" : "Public" },
1282
+ response = "predict" ,
1283
+ inputs = "actual" ,
1284
+ fitStatOut = {"name" : "FitStat" , "replace" : True , "caslib" : "Public" },
1285
+ casout = {"name" : "Lift" , "replace" : True , "caslib" : "Public" }
1286
+ )
1275
1287
1276
1288
fitstat_dict = (
1277
1289
pd .DataFrame (conn .CASTable ("FitStat" , caslib = "Public" ).to_frame ())
@@ -1280,11 +1292,11 @@ def calculate_model_statistics(
1280
1292
.to_dict ()
1281
1293
)
1282
1294
json_dict [0 ]["data" ][i ]["dataMap" ].update (fitstat_dict )
1283
-
1284
- roc_df = pd .DataFrame (conn .CASTable ("ROC" , caslib = "Public" ).to_frame ())
1285
- roc_dict = cls .apply_dataframe_to_json (json_dict [1 ]["data" ], i , roc_df )
1286
- for j in range (len (roc_dict )):
1287
- json_dict [1 ]["data" ][j ].update (roc_dict [j ])
1295
+ if target_type == 'classification' :
1296
+ roc_df = pd .DataFrame (conn .CASTable ("ROC" , caslib = "Public" ).to_frame ())
1297
+ roc_dict = cls .apply_dataframe_to_json (json_dict [1 ]["data" ], i , roc_df )
1298
+ for j in range (len (roc_dict )):
1299
+ json_dict [1 ]["data" ][j ].update (roc_dict [j ])
1288
1300
1289
1301
lift_df = pd .DataFrame (conn .CASTable ("Lift" , caslib = "Public" ).to_frame ())
1290
1302
lift_dict = cls .apply_dataframe_to_json (json_dict [2 ]["data" ], i , lift_df , 1 )
@@ -1293,19 +1305,26 @@ def calculate_model_statistics(
1293
1305
1294
1306
if json_path :
1295
1307
for i , name in enumerate ([FITSTAT , ROC , LIFT ]):
1296
- with open (Path (json_path ) / name , "w" ) as json_file :
1297
- json_file .write (json .dumps (json_dict [i ], indent = 4 , cls = NpEncoder ))
1298
- if cls .notebook_output :
1299
- print (
1300
- f"{ name } was successfully written and saved to "
1301
- f"{ Path (json_path ) / name } "
1302
- )
1308
+ if not (name == ROC and target_type == "prediction" ):
1309
+ with open (Path (json_path ) / name , "w" ) as json_file :
1310
+ json_file .write (json .dumps (json_dict [i ], indent = 4 , cls = NpEncoder ))
1311
+ if cls .notebook_output :
1312
+ print (
1313
+ f"{ name } was successfully written and saved to "
1314
+ f"{ Path (json_path ) / name } "
1315
+ )
1303
1316
else :
1304
- return {
1305
- FITSTAT : json .dumps (json_dict [0 ], indent = 4 , cls = NpEncoder ),
1306
- ROC : json .dumps (json_dict [1 ], indent = 4 , cls = NpEncoder ),
1307
- LIFT : json .dumps (json_dict [2 ], indent = 4 , cls = NpEncoder ),
1308
- }
1317
+ if target_type == 'classification' :
1318
+ return {
1319
+ FITSTAT : json .dumps (json_dict [0 ], indent = 4 , cls = NpEncoder ),
1320
+ ROC : json .dumps (json_dict [1 ], indent = 4 , cls = NpEncoder ),
1321
+ LIFT : json .dumps (json_dict [2 ], indent = 4 , cls = NpEncoder ),
1322
+ }
1323
+ else :
1324
+ return {
1325
+ FITSTAT : json .dumps (json_dict [0 ], indent = 4 , cls = NpEncoder ),
1326
+ LIFT : json .dumps (json_dict [2 ], indent = 4 , cls = NpEncoder ),
1327
+ }
1309
1328
1310
1329
@staticmethod
1311
1330
def check_for_data (
@@ -2208,11 +2227,11 @@ def generate_model_card(
2208
2227
algorithm : str ,
2209
2228
train_data : pd .DataFrame ,
2210
2229
train_predictions : Union [pd .Series , list ],
2211
- target_type : str = "interval " ,
2230
+ target_type : str = "classificaiton " ,
2212
2231
target_value : Union [str , int , float , None ] = None ,
2213
2232
interval_vars : Optional [list ] = [],
2214
2233
class_vars : Optional [list ] = [],
2215
- selection_statistic : str = "_GINI_" ,
2234
+ selection_statistic : str = None ,
2216
2235
server : str = "cas-shared-default" ,
2217
2236
caslib : str = "Public" ,
2218
2237
):
@@ -2237,19 +2256,22 @@ def generate_model_card(
2237
2256
train_predictions : pandas.Series, list
2238
2257
List of predictions made by the model on the training data.
2239
2258
target_type : string
2240
- Type the model is targeting . Currently supports "classification" and "interval " types.
2241
- The default value is "Interval ".
2259
+ Type of target the model is trying to find . Currently supports "classification" and "prediction " types.
2260
+ The default value is "classification ".
2242
2261
target_value : string, int, float, optional
2243
2262
Value the model is targeting for classification models. This argument is not needed for
2244
- Interval models. The default value is None.
2263
+ prediction models. The default value is None.
2245
2264
interval_vars : list, optional
2246
2265
A list of interval variables. The default value is an empty list.
2247
2266
class_vars : list, optional
2248
2267
A list of classification variables. The default value is an empty list.
2249
2268
selection_statistic: str, optional
2250
- The selection statistic chosen to score the model against other models. Can be any of the
2251
- following values: "_RASE_", "_NObs_", "_GINI_", "_GAMMA_", "_MCE_", "_ASE_", "_MCLL_",
2252
- "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_". The default value is "_GINI_".
2269
+ The selection statistic chosen to score the model against other models. Classification
2270
+ models can take any of the following values: "_RASE_", "_GINI_", "_GAMMA_", "_MCE_",
2271
+ "_ASE_", "_MCLL_", "_KS_", "_KSPostCutoff_", "_DIV_", "_TAU_", "_KSCut_", or "_C_".
2272
+ Prediction models can take any of the following values: "_ASE_", "_DIV_", "_RASE_", "_MAE_",
2273
+ "_RMAE_", "_MSLE_", "_RMSLE_" The default value is "_KS_" for classification models and
2274
+ "_ASE_" for prediction models.
2253
2275
server: str, optional
2254
2276
The CAS server the training data will be stored on. The default value is "cas-shared-default"
2255
2277
caslib: str, optional
@@ -2260,10 +2282,15 @@ def generate_model_card(
2260
2282
"For the model card data to be properly generated on a classification "
2261
2283
"model, a target value is required."
2262
2284
)
2263
- if target_type not in ["classification" , "interval " ]:
2285
+ if target_type not in ["classification" , "prediction " ]:
2264
2286
raise RuntimeError (
2265
- "Only classification and interval target types are currently accepted."
2287
+ "Only classification and prediction target types are currently accepted."
2266
2288
)
2289
+ if selection_statistic is None :
2290
+ if target_type is 'classification' :
2291
+ selection_statistic = '_KS_'
2292
+ elif target_type is 'prediction' :
2293
+ selection_statistic = "_ASE_"
2267
2294
if selection_statistic not in cls .valid_params :
2268
2295
raise RuntimeError (
2269
2296
"The selection statistic must be a value generated in dmcas_fitstat.json. See "
@@ -2292,7 +2319,7 @@ def generate_model_card(
2292
2319
)
2293
2320
2294
2321
# Generates the event percentage for Classification targets, and the event average
2295
- # for Interval targets
2322
+ # for prediction targets
2296
2323
update_dict = cls .generate_outcome_average (
2297
2324
train_data = train_data ,
2298
2325
input_variables = interval_vars + class_vars ,
@@ -2373,7 +2400,7 @@ def generate_outcome_average(
2373
2400
target_value : Union [str , int , float ] = None
2374
2401
):
2375
2402
"""
2376
- Generates the outcome average of the training data. For Interval targets, the event average
2403
+ Generates the outcome average of the training data. For prediction targets, the event average
2377
2404
is generated. For Classification targets, the event percentage is returned.
2378
2405
2379
2406
Parameters
@@ -2385,10 +2412,10 @@ def generate_outcome_average(
2385
2412
input_variables: list
2386
2413
A list of all input variables used by the model. Used to isolate the output variable.
2387
2414
target_type : string
2388
- Type the model is targeting. Currently supports "Classification " and "Interval " types.
2415
+ Type the model is targeting. Currently supports "classification " and "prediction " types.
2389
2416
target_value : string, int, float, optional
2390
2417
Value the model is targeting for Classification models. This argument is not needed for
2391
- Interval models. The default value is None.
2418
+ prediction models. The default value is None.
2392
2419
2393
2420
Returns
2394
2421
-------
@@ -2400,7 +2427,7 @@ def generate_outcome_average(
2400
2427
if target_type == "classification" :
2401
2428
value_counts = output_var [output_var .columns [0 ]].value_counts ()
2402
2429
return {'eventPercentage' : value_counts [target_value ]/ sum (value_counts )}
2403
- elif target_type == "interval " :
2430
+ elif target_type == "prediction " :
2404
2431
if not isinstance (output_var [output_var .columns [0 ]].iloc [0 ], numbers .Number ):
2405
2432
raise ValueError ("Detected output column is not numeric. Please ensure that " +
2406
2433
"the correct output column is being passed, and that no extra columns " +
@@ -2515,7 +2542,7 @@ def generate_variable_importance(
2515
2542
model_files : Union [str , Path , dict ],
2516
2543
train_data : pd .DataFrame ,
2517
2544
train_predictions : Union [pd .Series , list ],
2518
- target_type : str = "interval " ,
2545
+ target_type : str = "classification " ,
2519
2546
interval_vars : Optional [list ] = [],
2520
2547
class_vars : Optional [list ] = [],
2521
2548
caslib : str = "Public" ,
@@ -2535,8 +2562,8 @@ def generate_variable_importance(
2535
2562
train_predictions : pandas.Series, list
2536
2563
List of predictions made by the model on the training data.
2537
2564
target_type : string, optional
2538
- Type the model is targeting. Currently supports "Classification " and "Interval " types.
2539
- The default value is "Interval ".
2565
+ Type the model is targeting. Currently supports "classification " and "prediction " types.
2566
+ The default value is "classification ".
2540
2567
interval_vars : list, optional
2541
2568
A list of interval variables. The default value is an empty list.
2542
2569
class_vars : list, optional
@@ -2564,7 +2591,7 @@ def generate_variable_importance(
2564
2591
treeCrit = 'RSS'
2565
2592
else :
2566
2593
raise RuntimeError (
2567
- "The selected model type is unsupported. Currently, only models that have interval or classification target types are supported."
2594
+ "The selected model type is unsupported. Currently, only models that have prediction or classification target types are supported."
2568
2595
)
2569
2596
request_packages = list ()
2570
2597
if interval_vars :
0 commit comments