@@ -282,7 +282,7 @@ def score(var1, var2, var3, var4):
282
282
predict_method [1 ],
283
283
target_values = target_values ,
284
284
predict_threshold = predict_threshold ,
285
- target_index = target_index
285
+ target_index = target_index ,
286
286
)
287
287
288
288
if missing_values :
@@ -1090,7 +1090,7 @@ def _predictions_to_metrics(
1090
1090
target_values : Optional [List [str ]] = None ,
1091
1091
predict_threshold : Optional [float ] = None ,
1092
1092
h2o_model : Optional [bool ] = False ,
1093
- target_index : Optional [int ] = 1
1093
+ target_index : Optional [int ] = 1 ,
1094
1094
) -> None :
1095
1095
"""
1096
1096
Using the provided arguments, write in to the score code the method for handling
@@ -1136,7 +1136,12 @@ def _predictions_to_metrics(
1136
1136
# Binary classification model
1137
1137
elif len (target_values ) == 2 :
1138
1138
cls ._binary_target (
1139
- metrics , target_values , predict_returns , predict_threshold , target_index , h2o_model
1139
+ metrics ,
1140
+ target_values ,
1141
+ predict_returns ,
1142
+ predict_threshold ,
1143
+ target_index ,
1144
+ h2o_model ,
1140
1145
)
1141
1146
# Multiclass classification model
1142
1147
elif len (target_values ) > 2 :
@@ -1187,10 +1192,11 @@ def _no_targets_no_thresholds(
1187
1192
)
1188
1193
"""
1189
1194
if input_array.shape[0] == 1:
1190
- Classification = prediction[0]
1195
+ Classification = prediction[1][ 0]
1191
1196
return Classification
1192
1197
else:
1193
- output_table = pd.DataFrame({'Classification': prediction})
1198
+ output_table = prediction.drop(prediction.columns[1:], axis=1)
1199
+ output_table.columns = ['Classification']
1194
1200
return output_table
1195
1201
"""
1196
1202
else :
@@ -1203,11 +1209,10 @@ def _no_targets_no_thresholds(
1203
1209
)
1204
1210
"""
1205
1211
if input_array.shape[0] == 1:
1206
- Classification = prediction[1 ][0]
1212
+ Classification = prediction[0 ][0]
1207
1213
return Classification
1208
1214
else:
1209
- output_table = prediction.drop(prediction.columns[1:], axis=1)
1210
- output_table.columns = ['Classification']
1215
+ output_table = pd.DataFrame({'Classification': prediction})
1211
1216
return output_table
1212
1217
"""
1213
1218
else :
@@ -1271,7 +1276,7 @@ def _binary_target(
1271
1276
returns : List [Any ],
1272
1277
threshold : Optional [float ] = None ,
1273
1278
h2o_model : Optional [bool ] = None ,
1274
- target_index : Optional [int ] = 1
1279
+ target_index : Optional [int ] = 1 ,
1275
1280
) -> None :
1276
1281
"""
1277
1282
Handle binary model prediction outputs.
@@ -1317,13 +1322,13 @@ def _binary_target(
1317
1322
if h2o_model :
1318
1323
cls .score_code += (
1319
1324
f"{ '' :4} if input_array.shape[0] == 1:\n "
1320
- f"{ '' :8} if prediction[1][{ target_index } + 1 ] > { threshold } :\n "
1325
+ f"{ '' :8} if prediction[1][{ target_index + 1 } ] > { threshold } :\n "
1321
1326
f"{ '' :12} { metrics } = \" { target_values [target_index ]} \" \n "
1322
1327
f"{ '' :8} else:\n "
1323
1328
f"{ '' :12} { metrics } = \" { target_values [abs (target_index - 1 )]} \" \n "
1324
1329
f"{ '' :8} return { metrics } \n "
1325
1330
f"{ '' :4} else:\n "
1326
- f"{ '' :8} output_table = pd.DataFrame({{'{ metrics } ': np.where(prediction[prediction.columns[{ target_index + 1 } ]] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')}})"
1331
+ f"{ '' :8} output_table = pd.DataFrame({{'{ metrics } ': np.where(prediction[prediction.columns[{ target_index + 1 } ]] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')}})\n "
1327
1332
f"{ '' :8} return output_table"
1328
1333
)
1329
1334
"""
@@ -1385,8 +1390,8 @@ def _binary_target(
1385
1390
f"{ '' :8} return { metrics } \n "
1386
1391
f"{ '' :4} else:\n "
1387
1392
f"{ '' :8} target_values = { target_values } \n "
1388
- f"{ '' :8} prediction = pd.DataFrame(prediction)"
1389
- f"{ '' :8} output_table = pd.DataFrame({{'{ metrics } ': np.where(prediction[prediction.columns[{ target_index } ]] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')}})"
1393
+ f"{ '' :8} prediction = pd.DataFrame(prediction)\n "
1394
+ f"{ '' :8} output_table = pd.DataFrame({{'{ metrics } ': np.where(prediction[prediction.columns[{ target_index } ]] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')}})\n "
1390
1395
f"{ '' :8} return output_table"
1391
1396
)
1392
1397
"""
@@ -1404,6 +1409,7 @@ def _binary_target(
1404
1409
"""
1405
1410
# Classification and probability returned; return classification value
1406
1411
elif len (returns ) > 1 and sum (returns ) == 1 :
1412
+ # TODO: Either figure out how to handle threshold or add warning
1407
1413
# Determine which return is the classification value
1408
1414
class_index = [i for i , x in enumerate (returns ) if x ][0 ]
1409
1415
cls .score_code += (
@@ -1424,7 +1430,6 @@ def _binary_target(
1424
1430
else :
1425
1431
cls ._invalid_predict_config ()
1426
1432
elif len (metrics ) == 2 :
1427
- # TODO: change to align with other cases and assign target_values to classification column
1428
1433
# H2O models with two metrics are assumed to be classification + probability
1429
1434
if h2o_model :
1430
1435
warn (
@@ -1434,10 +1439,16 @@ def _binary_target(
1434
1439
)
1435
1440
cls .score_code += (
1436
1441
f"{ '' :4} if input_array.shape[0] == 1:\n "
1437
- f"{ '' :8} return prediction[1][0], float(prediction[1][2])\n "
1442
+ f"{ '' :8} if prediction[1][{ target_index + 1 } ] > { threshold } :\n "
1443
+ f"{ '' :12} { metrics [0 ]} = '{ target_values [target_index ]} '\n "
1444
+ f"{ '' :8} else:\n "
1445
+ f"{ '' :12} { metrics [0 ]} = '{ target_values [abs (target_index - 1 )]} '\n "
1446
+ f"{ '' :8} return { metrics [0 ]} , float(prediction[1][{ target_index + 1 } ])\n "
1438
1447
f"{ '' :4} else:\n "
1439
1448
f"{ '' :8} output_table = prediction.drop(prediction.columns[{ abs (target_index - 1 )+ 1 } ], axis=1)\n "
1449
+ f"{ '' :8} classifications = np.where(prediction[prediction.columns[{ target_index + 1 } ]] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')\n "
1440
1450
f"{ '' :8} output_table.columns = { metrics } \n "
1451
+ f"{ '' :8} output_table['{ metrics [0 ]} '] = classifications\n "
1441
1452
f"{ '' :8} return output_table"
1442
1453
)
1443
1454
"""
@@ -1494,7 +1505,7 @@ def _binary_target(
1494
1505
f"{ '' :8} return { metrics [0 ]} , prediction[0][{ target_index } ]\n "
1495
1506
f"{ '' :4} else:\n "
1496
1507
f"{ '' :8} df = pd.DataFrame(prediction)\n "
1497
- f"{ '' :8} proba = df[0 ]\n "
1508
+ f"{ '' :8} proba = df[{ target_index } ]\n "
1498
1509
f"{ '' :8} classifications = np.where(df[{ target_index } ] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')\n "
1499
1510
f"{ '' :8} return pd.DataFrame({{'{ metrics [0 ]} ': classifications, '{ metrics [1 ]} ': proba}})"
1500
1511
)
@@ -1511,6 +1522,7 @@ def _binary_target(
1511
1522
classifications = np.where(df[1] > .5, 'B', 'A')
1512
1523
return pd.DataFrame({'Classification': classifications, 'Probability': proba})
1513
1524
"""
1525
+ # TODO: Potentially add threshold
1514
1526
# Return classification and probability value
1515
1527
elif sum (returns ) == 1 and len (returns ) == 2 :
1516
1528
cls .score_code += (
@@ -1536,10 +1548,11 @@ def _binary_target(
1536
1548
if class_index == 0 :
1537
1549
cls .score_code += (
1538
1550
f"{ '' :4} if input_array.shape[0] == 1:\n "
1539
- f"{ '' :8} return prediction[0][0], prediction[0][{ target_index } + 1 ]\n "
1551
+ f"{ '' :8} return prediction[0][0], prediction[0][{ target_index + 1 } ]\n "
1540
1552
f"{ '' :4} else:\n "
1553
+ f"{ '' :8} prediction = pd.DataFrame(prediction)\n "
1541
1554
f"{ '' :8} output_table = prediction.drop(prediction.columns[{ abs (target_index - 1 )+ 1 } ], axis=1)\n "
1542
- f"{ '' :8} output_table.columns = { metrics } "
1555
+ f"{ '' :8} output_table.columns = { metrics } \n "
1543
1556
f"{ '' :8} return output_table"
1544
1557
)
1545
1558
@@ -1556,9 +1569,10 @@ def _binary_target(
1556
1569
f"{ '' :4} if input_array.shape[0] == 1:\n "
1557
1570
f"{ '' :8} return prediction[0][{ class_index } ], prediction[0][{ target_index } ]\n "
1558
1571
f"{ '' :4} else:\n "
1572
+ f"{ '' :8} prediction = pd.DataFrame(prediction)\n "
1559
1573
f"{ '' :8} output_table = prediction.drop(prediction.columns[{ abs (target_index - 1 )} ], axis=1)\n "
1560
1574
f"{ '' :8} output_table = output_table[output_table.columns[::-1]]\n "
1561
- f"{ '' :8} output_table.columns = { metrics } "
1575
+ f"{ '' :8} output_table.columns = { metrics } \n "
1562
1576
f"{ '' :8} return output_table"
1563
1577
)
1564
1578
"""
@@ -1622,6 +1636,7 @@ def _binary_target(
1622
1636
return output_table
1623
1637
"""
1624
1638
elif sum (returns ) == 0 and len (returns ) == 2 :
1639
+ # TODO: Make decision on whether ordering should follow given pattern or reflect input ordering
1625
1640
warn (
1626
1641
"Due to the ambiguity of the provided metrics and prediction return"
1627
1642
" types, the score code assumes the return order to be: "
@@ -1638,8 +1653,8 @@ def _binary_target(
1638
1653
f"{ '' :8} return { metrics [0 ]} , prediction[0][{ target_index } ], prediction[0][{ abs (target_index - 1 )} ]\n "
1639
1654
f"{ '' :4} else:\n "
1640
1655
f"{ '' :8} output_table = pd.DataFrame(prediction, columns={ metrics [1 :]} )\n "
1641
- f"{ '' :8} classifications = np.where(prediction[prediction .columns[{ target_index } ]] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')"
1642
- f"{ '' :8} output_table.insert(loc=0, column='{ metrics [0 ]} ', value=classifications)"
1656
+ f"{ '' :8} classifications = np.where(output_table[output_table .columns[{ target_index } ]] > { threshold } , '{ target_values [target_index ]} ', '{ target_values [abs (target_index - 1 )]} ')\n "
1657
+ f"{ '' :8} output_table.insert(loc=0, column='{ metrics [0 ]} ', value=classifications)\n "
1643
1658
f"{ '' :8} return output_table"
1644
1659
)
1645
1660
"""
0 commit comments