Skip to content

Commit a84f29b

Browse files
committed
Added unit tests for batch scoring in score code
1 parent a96c3ea commit a84f29b

File tree

2 files changed

+636
-46
lines changed

2 files changed

+636
-46
lines changed

src/sasctl/pzmm/write_score_code.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,7 +1167,9 @@ def _no_targets_no_thresholds(
11671167
f"{'':4}if input_array.shape[0] == 1:\n"
11681168
f"{'':8}{metrics} = prediction[1][0]\n{'':8}return {metrics}\n"
11691169
f"{'':4}else:\n"
1170-
f"{'':8}return prediction.iloc[:, 0]\n"
1170+
f"{'':8}output_table = prediction.drop(prediction.columns[1:], axis=1)\n"
1171+
f"{'':8}output_table.columns = ['{metrics}']\n"
1172+
f"{'':8}return output_table"
11711173
)
11721174
"""
11731175
if input_array.shape[0] == 1:
@@ -1296,8 +1298,7 @@ def _binary_target(
12961298
f"{'':8}return {metrics}\n"
12971299
f"{'':4}else:\n"
12981300
f"{'':8}target_values = {target_values}\n"
1299-
f"{'':8}df = pd.DataFrame(prediction[1:], columns=prediction[0])\n"
1300-
f"{'':8}output_table = pd.DataFrame({{'{metrics}' : np.array(target_values)[np.argmax(df.iloc[0:, 1:].values, axis=1)]}})\n"
1301+
f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.array(target_values)[np.argmax(prediction.iloc[0:, 1:].values, axis=1)]}})\n"
13011302
f"{'':8}return output_table"
13021303
)
13031304
"""
@@ -1399,6 +1400,7 @@ def _binary_target(
13991400
else:
14001401
cls._invalid_predict_config()
14011402
elif len(metrics) == 2:
1403+
# TODO: change to align with other cases and assign target_values to classification column
14021404
# H2O models with two metrics are assumed to be classification + probability
14031405
if h2o_model:
14041406
warn(
@@ -1410,8 +1412,9 @@ def _binary_target(
14101412
f"{'':4}if input_array.shape[0] == 1:\n"
14111413
f"{'':8}return prediction[1][0], float(prediction[1][2])\n"
14121414
f"{'':4}else:\n"
1413-
f"{'':8}output_table = pd.DataFrame(prediction[1:], columns={metrics})\n"
1414-
f"{'':8}return output_table.drop('drop', axis=1)"
1415+
f"{'':8}output_table = prediction.drop(prediction.columns[1], axis=1)\n"
1416+
f"{'':8}output_table.columns = {metrics}\n"
1417+
f"{'':8}return output_table"
14151418
)
14161419
"""
14171420
if input_array.shape[0] == 1:
@@ -1464,9 +1467,9 @@ def _binary_target(
14641467
f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n"
14651468
f"{'':8}return {metrics[0]}, prediction[0]\n"
14661469
f"{'':4}else:\n"
1467-
f"{'':8}df = pd.DataFrame(prediction, columns = {target_values})\n"
1468-
f"{'':8}proba = df[:, 0]\n"
1469-
f"{'':8}classification = np.where(df[:, 0] > df[:, 1], '{target_values[0]}', '{target_values[1]}')\n"
1470+
f"{'':8}df = pd.DataFrame(prediction)\n"
1471+
f"{'':8}proba = df[0]\n"
1472+
f"{'':8}classifications = np.where(df[0] > df[1], '{target_values[0]}', '{target_values[1]}')\n"
14701473
f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': proba}})"
14711474
)
14721475
"""
@@ -1526,6 +1529,7 @@ def _binary_target(
15261529
f"{'':8}return prediction[{class_index}], prediction[0]\n"
15271530
f"{'':4}else:\n"
15281531
f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
1532+
f"{'':8}output_table = output_table[output_table.columns[::-1]]\n"
15291533
f"{'':8}return output_table.drop('drop', axis=1)")
15301534
"""
15311535
if input_array.shape[0] == 1:
@@ -1543,7 +1547,8 @@ def _binary_target(
15431547
f"{'':8}return prediction[1][0], float(prediction[1][1]), "
15441548
f"float(prediction[1][2])\n"
15451549
f"{'':4}else:\n"
1546-
f"{'':8}return pd.DataFrame(prediction[1:], columns={metrics})"
1550+
f"{'':8}prediction.columns = {metrics}\n"
1551+
f"{'':8}return prediction"
15471552
)
15481553
"""
15491554
if input_array.shape[0] == 1:
@@ -1599,7 +1604,7 @@ def _binary_target(
15991604
f"{'':8}return {metrics[0]}, prediction[0], prediction[1]\n"
16001605
f"{'':4}else:\n"
16011606
f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
1602-
f"{'':8}output_table = output_table.insert(0, '{metrics[0]}', np.array(target_values)[np.argmax(output_table.iloc[0:, 0:].values, axis=1)])\n"
1607+
f"{'':8}output_table.insert(0, '{metrics[0]}', np.array({target_values})[np.argmax(output_table.values, axis=1)])\n"
16031608
f"{'':8}return output_table"
16041609
)
16051610
"""
@@ -1643,7 +1648,8 @@ def _binary_target(
16431648
f"{'':8}return prediction[1], prediction[0], 1 - prediction[0]\n"
16441649
f"{'':4}else:\n"
16451650
f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
1646-
f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[0]}']\n"
1651+
f"{'':8}output_table = output_table[output_table.columns[::-1]]\n"
1652+
f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[1]}']\n"
16471653
f"{'':8}return output_table"
16481654
)
16491655
"""
@@ -1660,7 +1666,7 @@ def _binary_target(
16601666
f"{'':4}if input_array.shape[0] == 1:\n"
16611667
f"{'':8}return prediction[0], prediction[1], prediction[2]\n"
16621668
f"{'':4}else:\n"
1663-
f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics})"
1669+
f"{'':8}return pd.DataFrame(prediction, columns={metrics})"
16641670
)
16651671
"""
16661672
if input_array.shape[0] == 1:
@@ -1715,7 +1721,7 @@ def _nonbinary_targets(
17151721
f"index(max(prediction[1][1:]))]\n"
17161722
f"{'':8}return {metrics}\n"
17171723
f"{'':4}else:\n"
1718-
f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.array(target_values)[np.argmax(df.iloc[1:, 0:].values, axis=1)]}})\n"
1724+
f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.array(target_values)[np.argmax(prediction.iloc[:, 1:].values, axis=1)]}})\n"
17191725
f"{'':8}return output_table"
17201726
)
17211727
"""
@@ -1724,17 +1730,16 @@ def _nonbinary_targets(
17241730
classification_variable = target_values[prediction[1][1:].index(max(prediction[1][1:]))]
17251731
return classification_variable
17261732
else:
1727-
output_table = pd.DataFrame({'classification_variable': np.array(target_values)[np.argmax(df.iloc[1:, 0:].values, axis=1)]})
1733+
output_table = pd.DataFrame({'classification_variable': np.array(target_values)[np.argmax(df.iloc[:, 1:].values, axis=1)]})
17281734
return output_table
17291735
"""
17301736
# One return that is the classification
17311737
elif len(returns) == 1:
17321738
cls.score_code += (
17331739
f"{'':4}if input_array.shape[0] == 1:\n"
1734-
f"{'':8}{metrics} = prediction\n"
1735-
f"{'':8}return {metrics}\n"
1740+
f"{'':8}return prediction\n"
17361741
f"{'':4}else:\n"
1737-
f"{'':8}return pd.DataFrame('{metrics}': prediction)"
1742+
f"{'':8}return pd.DataFrame({{'{metrics}': prediction}})"
17381743
)
17391744
"""
17401745
if input_array.shape[0] == 1:
@@ -1749,7 +1754,7 @@ def _nonbinary_targets(
17491754
f"{'':8}target_values = {target_values}\n"
17501755
f"{'':8}return target_values[prediction.index(max(prediction))]\n"
17511756
f"{'':4}else:\n"
1752-
f"{'':8}output_table = pd.DataFrame({{'{metrics}' : np.array(target_values)[np.argmax(prediction, axis=1)]}})\n"
1757+
f"{'':8}output_table = pd.DataFrame({{'{metrics}' : np.array({target_values})[np.argmax(prediction, axis=1)]}})\n"
17531758
f"{'':8}return output_table"
17541759
)
17551760
"""
@@ -1767,7 +1772,7 @@ def _nonbinary_targets(
17671772
f"{'':4}if input_array.shape[0] == 1:\n"
17681773
f"{'':8}return prediction[{class_index}]\n"
17691774
f"{'':4}else:\n"
1770-
f"{'':8}return pd.DataFrame('{metrics}': [p[{class_index}] for p in prediction])"
1775+
f"{'':8}return pd.DataFrame({{'{metrics}': [p[{class_index}] for p in prediction]}})"
17711776
)
17721777
"""
17731778
if input_array.shape[0] == 1:
@@ -1786,9 +1791,8 @@ def _nonbinary_targets(
17861791
f"index(max(prediction[1][1:]))]\n"
17871792
f"{'':8}return {metrics[0]}, max(prediction[1][1:])\n"
17881793
f"{'':4}else:\n"
1789-
f"{'':8}df = pd.DataFrame(prediction[1:], columns=prediction[0])\n"
1790-
f"{'':8}index = np.argmax(df.iloc[0:, 1:].values, axis=1)\n"
1791-
f"{'':8}return pd.DataFrame({{'{metrics[0]}': np.array(target_values)[index], '{metrics[1]}': np.max(df.iloc[0:, 1:], axis=1)}})\n"
1794+
f"{'':8}index = np.argmax(prediction.iloc[0:, 1:].values, axis=1)\n"
1795+
f"{'':8}return pd.DataFrame({{'{metrics[0]}': np.array(target_values)[index], '{metrics[1]}': np.max(prediction.iloc[0:, 1:], axis=1)}})\n"
17921796
)
17931797
"""
17941798
target_values = [1, 2, 3]
@@ -1808,10 +1812,10 @@ def _nonbinary_targets(
18081812
f"{'':8}return target_values[prediction.index(max(prediction))], "
18091813
f"max(prediction)\n"
18101814
f"{'':4}else:\n"
1811-
f"{'':8}df = pd.DataFrame(prediction, columns = target_values)\n"
1812-
f"{'':8}index = np.argmax(df, axis=1)\n"
1813-
f"{'':8}classifications = [np.array(target_values)[index]]\n"
1814-
f"{'':8}max_proba = np.max(df, axis=1)\n"
1815+
f"{'':8}df = pd.DataFrame(prediction)\n"
1816+
f"{'':8}index = np.argmax(df.values, axis=1)\n"
1817+
f"{'':8}classifications = np.array(target_values)[index]\n"
1818+
f"{'':8}max_proba = np.max(df.values, axis=1)\n"
18151819
f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': max_proba}})"
18161820
)
18171821
"""
@@ -1835,7 +1839,7 @@ def _nonbinary_targets(
18351839
f"{'':4}else:\n"
18361840
f"{'':8}df = pd.DataFrame(prediction)\n"
18371841
f"{'':8}probas = df.drop({class_index}, axis=1)\n"
1838-
f"{'':8}max_proba = np.max(probas, axis=1)\n"
1842+
f"{'':8}max_proba = np.max(probas.values, axis=1)\n"
18391843
f"{'':8}return pd.DataFrame({{'{metrics[0]}': df[{class_index}], '{metrics[1]}': max_proba}})"
18401844
)
18411845
"""
@@ -1857,8 +1861,8 @@ def _nonbinary_targets(
18571861
f"{'':4}if input_array.shape[0] == 1:\n"
18581862
f"{'':8}return {', '.join(h2o_returns)}\n"
18591863
f"{'':4}else:\n"
1860-
f"{'':8}output_table = pd.DataFrame(prediction[1:], columns={metrics})\n"
1861-
f"{'':8}output_table = output_table.drop('{metrics[0]}', axis=1)]\n"
1864+
f"{'':8}output_table = prediction.drop(prediction.columns[0], axis=1)\n"
1865+
f"{'':8}output_table.columns = {metrics}\n"
18621866
f"{'':8}return output_table"
18631867
)
18641868
"""
@@ -1875,7 +1879,8 @@ def _nonbinary_targets(
18751879
f"{'':4}if input_array.shape[0] == 1:\n"
18761880
f"{'':8}return {', '.join(h2o_returns)}\n"
18771881
f"{'':4}else:\n"
1878-
f"{'':8}output_table = pd.DataFrame(prediction[1:], columns={metrics})"
1882+
f"{'':8}prediction.columns = {metrics}\n"
1883+
f"{'':8}return prediction"
18791884
)
18801885
"""
18811886
if input_array.shape[0] == 1:
@@ -1894,7 +1899,8 @@ def _nonbinary_targets(
18941899
f"{'':4}if input_array.shape[0] == 1:\n"
18951900
f"{'':8}return {', '.join(proba_returns)}\n"
18961901
f"{'':4}else:\n"
1897-
f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics})"
1902+
f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics})\n"
1903+
f"{'':8}return output_table"
18981904
)
18991905
"""
19001906
if input_array.shape[0] == 1:
@@ -1912,9 +1918,9 @@ def _nonbinary_targets(
19121918
f"{'':8}return target_values[prediction.index(max(prediction))], "
19131919
f"{', '.join(proba_returns)}\n"
19141920
f"{'':4}else:\n"
1915-
f"{'':8}classifications = [target_values[np.argmax(p)[0]] for p in prediction]\n"
19161921
f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1:]})\n"
1917-
f"{'':8}output_table = output_table.insert(loc=0, column={metrics[0]}, data=np.array(target_values)[np.argmax(df.iloc[0:, 1:].values, axis=1)])\n"
1922+
f"{'':8}classifications = np.array(target_values)[np.argmax(output_table.values, axis=1)]\n"
1923+
f"{'':8}output_table.insert(0, '{metrics[0]}', classifications)\n"
19181924
f"{'':8}return output_table"
19191925
)
19201926
"""

0 commit comments

Comments
 (0)