Skip to content

Commit dfe033c

Browse files
committed
began process of making batch-friendly score code
1 parent 21a8237 commit dfe033c

File tree

1 file changed

+122
-42
lines changed

1 file changed

+122
-42
lines changed

src/sasctl/pzmm/write_score_code.py

Lines changed: 122 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,13 @@ def score(var1, var2, var3, var4):
260260
f"\n{'':4}# Check for numpy values and convert to a CAS readable "
261261
f"representation\n"
262262
f"{'':4}if isinstance(prediction, np.ndarray):\n"
263-
f"{'':8}prediction = prediction.tolist()[0]\n\n"
263+
f"{'':8}prediction = prediction.tolist()\n\n"
264264
)
265265
"""
266266
267267
# Check for numpy values and conver to a CAS readable representation
268268
if isinstance(prediction, np.ndarray):
269-
prediction = prediction.tolist()[0]
269+
prediction = prediction.tolist()
270270
271271
272272
"""
@@ -1164,43 +1164,74 @@ def _no_targets_no_thresholds(
11641164
# Classification (with only classification output) or prediction model
11651165
if h2o_model:
11661166
cls.score_code += (
1167-
f"{'':4}{metrics} = prediction[1][0]\n\n{'':4}return {metrics}"
1167+
f"{'':4}if input_array.shape[0] == 1\n"
1168+
f"{'':8}{metrics[0]} = prediction[1][0]\n\n{'':8}return {metrics[0]}"
1169+
f"{'':4}else:\n"
1170+
f"{'':8}prediction = pd.Dataframe(prediction[1:], columns=prediction[0])\n"
1171+
f"{'':8}{metrics[0]} = prediction['predict']\n\n"
1172+
f"{'':8}return {metrics[0]}"
11681173
)
11691174
"""
11701175
output_variable = predictions[1][0]
11711176
return output_variable
11721177
"""
11731178
else:
11741179
cls.score_code += (
1175-
f"{'':4}{metrics} = prediction\n\n{'':4}return {metrics}"
1180+
f"{'':4}if input_array.shape[0] == 1\n"
1181+
f"{'':8}{metrics} = prediction[0]\n\n{'':8}return {metrics}"
1182+
f"{'':4}else:\n"
1183+
f"{'':8}output_table = pd.DataFrame('{metrics}': prediction)\n\n{'':8}return output_table"
11761184
)
11771185
"""
1178-
output_variable = prediction
1179-
return output_variable
1186+
if input_array.shape[0] == 1:
1187+
output_variable = prediction[0]
1188+
return output_variable
1189+
else:
1190+
output_table = pd.DataFrame('output_variable': prediction)
1191+
return output_table
11801192
"""
11811193
else:
11821194
# Classification model including predictions and classification
11831195
if h2o_model:
1184-
cls.score_code += f"{'':4}{metrics[0]} = prediction[1][0]\n"
1196+
cls.score_code += (
1197+
f"{'':4}if input_array.shape[0] == 1\n"
1198+
f"{'':8}{metrics[0]} = prediction[1][0]\n")
11851199
for i in range(len(metrics) - 1):
11861200
cls.score_code += (
1187-
f"{'':4}{metrics[i + 1]} = float(prediction[1][{i + 1}])\n"
1201+
f"{'':8}{metrics[i + 1]} = float(prediction[1][{i + 1}])\n"
11881202
)
1203+
cls.score_code += (
1204+
f"{'':4}else:\n"
1205+
f"{'':8}output_table = pd.DataFrame(prediction[1:], columns=[{','.join(metrics)}])\n"
1206+
f"{'':8}return output_table\n"
1207+
)
11891208
"""
1190-
classification_variable = prediction[1][0]
1191-
prediction_variable_1 = float(prediction[1][1])
1192-
prediction_variable_2 = float(prediction[1][2])
1209+
if input_array.shape[0] == 1:
1210+
classification_variable = prediction[1][0]
1211+
prediction_variable_1 = float(prediction[1][1])
1212+
prediction_variable_2 = float(prediction[1][2])
1213+
else:
1214+
output_table = pd.DataFrame(prediction[1:], columns=[classification_variable,variable_1,variable_2])
1215+
return output_table
11931216
"""
11941217
else:
1218+
cls.score_code += f"{'':4}if input_array.shape[0] == 1"
11951219
for i in range(len(metrics)):
1196-
cls.score_code += f"{'':4}{metrics[i]} = prediction[{i}]\n"
1220+
cls.score_code += f"{'':8}{metrics[i]} = prediction[{i}]\n"
1221+
#TODO: What is the use case for this? unsure of how batched scoring would work here
1222+
cls.score_code += (
1223+
f"{'':4}else:\n"
1224+
f"{'':8}output_table = pd.DataFrame(prediction, columns=[{','.join(metrics)}])\n"
1225+
f"{'':8}return output_table\n"
1226+
)
11971227
cls.score_code += f"\n{'':4}return {', '.join(metrics)}"
11981228
"""
11991229
classification_variable = prediction[0]
12001230
prediction_variable_1 = prediction[1]
12011231
prediction_variable_2 = prediction[2])
12021232
"""
12031233

1234+
12041235
@classmethod
12051236
def _binary_target(
12061237
cls,
@@ -1250,11 +1281,16 @@ def _binary_target(
12501281
# For h2o models with only one metric provided, return the classification
12511282
if h2o_model:
12521283
cls.score_code += (
1253-
f"{'':4}if prediction[1][2] > {threshold}:\n"
1254-
f"{'':8}{metrics} = \"{target_values[0]}\"\n"
1284+
f"{'':4}if input_array.shape[0] == 1\n"
1285+
f"{'':8}if prediction[1][2] > {threshold}:\n"
1286+
f"{'':12}{metrics} = \"{target_values[0]}\"\n"
1287+
f"{'':8}else:\n"
1288+
f"{'':12}{metrics} = \"{target_values[1]}\"\n"
1289+
f"{'':8}return {metrics}\n"
12551290
f"{'':4}else:\n"
1256-
f"{'':8}{metrics} = \"{target_values[1]}\"\n\n"
1257-
f"{'':4}return {metrics}"
1291+
f"{'':8}output_table = pd.DataFrame(prediction[1:], columns=prediction[0])\n"
1292+
f"{'':8}output_table = pd.DataFrame({{'test': output_table['p1'].map(lambda x: float(x) > {threshold}).astype(int)}})\n"
1293+
f"{'':8}return output_table"
12581294
)
12591295
"""
12601296
if prediction[1][2] > 0.5:
@@ -1266,18 +1302,25 @@ def _binary_target(
12661302
"""
12671303
# One return that is the classification
12681304
elif len(returns) == 1 and returns[0]:
1269-
cls.score_code += f"{'':4}return prediction"
1305+
cls.score_code += (
1306+
f"{'':4}if input_array.shape[0] == 1\n"
1307+
f"{'':8}return prediction\n"
1308+
f"{'':4}else:\n"
1309+
f"{'':8}return pd.DataFrame({{'{metrics}': prediction}})")
12701310
"""
12711311
return prediction
12721312
"""
12731313
# One return that is a probability
12741314
elif len(returns) == 1 and not returns[0]:
12751315
cls.score_code += (
1276-
f"{'':4}if prediction > {threshold}:\n"
1277-
f"{'':8}{metrics} = \"{target_values[0]}\"\n"
1316+
f"{'':4}if input_array.shape[0] == 1\n"
1317+
f"{'':8}if prediction > {threshold}:\n"
1318+
f"{'':12}{metrics} = \"{target_values[0]}\"\n"
1319+
f"{'':8}else:\n"
1320+
f"{'':12}{metrics} = \"{target_values[1]}\"\n"
1321+
f"{'':8}return {metrics}\n"
12781322
f"{'':4}else:\n"
1279-
f"{'':8}{metrics} = \"{target_values[1]}\"\n\n"
1280-
f"{'':4}return {metrics}"
1323+
f"{'':8}return pd.DataFrame({{'{metrics}': ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]}})\n"
12811324
)
12821325
"""
12831326
if prediction > 0.5:
@@ -1291,11 +1334,15 @@ def _binary_target(
12911334
elif len(returns) == 2 and sum(returns) == 0:
12921335
# Only probabilities returned; return classification for larger value
12931336
cls.score_code += (
1294-
f"{'':4}if prediction[0] > prediction[1]:\n"
1295-
f"{'':8}{metrics} = \"{target_values[0]}\"\n"
1337+
f"{'':4}if input_array.shape[0] == 1\n"
1338+
f"{'':8}if prediction[0] > prediction[1]:\n"
1339+
f"{'':12}{metrics} = \"{target_values[0]}\"\n"
1340+
f"{'':8}else:\n"
1341+
f"{'':12}{metrics} = \"{target_values[1]}\"\n\n"
1342+
f"{'':8}return {metrics}\n"
12961343
f"{'':4}else:\n"
1297-
f"{'':8}{metrics} = \"{target_values[1]}\"\n\n"
1298-
f"{'':4}return {metrics}"
1344+
f"{'':8}classifications = np.argmax(prediction, axis=1)\n"
1345+
f"{'':8}reutrn pd.DataFrame({{'{metrics}': ['{target_values[1]}' if c == 0 else '{target_values[0]}' for c in classfications]}})"
12991346
)
13001347
"""
13011348
if prediction[0] > prediction[1]:
@@ -1310,7 +1357,11 @@ def _binary_target(
13101357
# Determine which return is the classification value
13111358
class_index = [i for i, x in enumerate(returns) if x][0]
13121359
cls.score_code += (
1313-
f"{'':4}{metrics} = prediction[{class_index}]\n\nreturn {metrics}"
1360+
f"{'':4}if input_array.shape[0] == 1\n"
1361+
f"{'':8}{metrics} = prediction[{class_index}]\n\nreturn {metrics}\n"
1362+
f"{'':4}else:\n"
1363+
f"{'':8}output_table = pd.DataFrame({{'{metrics}': [p[{class_index}] for p in prediction]}})\n"
1364+
f"{'':8}return output_table"
13141365
)
13151366
"""
13161367
classification_variable = prediction[2]
@@ -1328,7 +1379,11 @@ def _binary_target(
13281379
"the target event to occur."
13291380
)
13301381
cls.score_code += (
1331-
f"{'':4}return prediction[1][0], float(prediction[1][2])"
1382+
f"{'':4}if input_array.shape[0] == 1\n"
1383+
f"{'':8}return prediction[1][0], float(prediction[1][2])"
1384+
f"{'':4}else:\n"
1385+
f"{'':8}output_table = pd.DataFrame(prediction[1:], columns=[{',drop,'.join(metrics)}])\n"
1386+
f"{'':8}return output_table.drop('drop', axis=1)"
13321387
)
13331388
"""
13341389
return prediction[1][0], float(prediction[1][2])
@@ -1341,11 +1396,15 @@ def _binary_target(
13411396
"metric is returned first."
13421397
)
13431398
cls.score_code += (
1344-
f"{'':4}if prediction > {threshold}:\n"
1345-
f"{'':8}{metrics[0]} = \"{target_values[0]}\"\n"
1399+
f"{'':4}if input_array.shape[0] == 1\n"
1400+
f"{'':8}if prediction > {threshold}:\n"
1401+
f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n"
1402+
f"{'':8}else:\n"
1403+
f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n\n"
1404+
f"{'':8}return {metrics[0]}, prediction\n"
13461405
f"{'':4}else:\n"
1347-
f"{'':8}{metrics[0]} = \"{target_values[1]}\"\n\n"
1348-
f"{'':4}return {metrics[0]}, prediction"
1406+
f"{'':8}classifications = ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]\n"
1407+
f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': prediction}})"
13491408
)
13501409
"""
13511410
if prediction > 0.5:
@@ -1363,14 +1422,18 @@ def _binary_target(
13631422
"target event probability should be returned."
13641423
)
13651424
cls.score_code += (
1366-
f"{'':4}if prediction[0] > prediction[1]:\n"
1367-
f"{'':8}{metrics[0]} = \"{target_values[0]}\"\n"
1425+
f"{'':4}if input_array.shape[0] == 1\n"
1426+
f"{'':8}if prediction[0] > prediction[1]:\n"
1427+
f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n"
1428+
f"{'':8}else:\n"
1429+
f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n\n"
1430+
f"{'':8}return {metrics[0]}, prediction[0]\n"
13681431
f"{'':4}else:\n"
1369-
f"{'':8}{metrics[0]} = \"{target_values[1]}\"\n\n"
1370-
f"{'':4}return {metrics[0]}, prediction[0]"
1432+
f"{'':8}classifications = ['{target_values[0]}' if p[0]>p[1] else '{target_values[1]} for p in prediction]\n"
1433+
f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': [p[0] for p in prediction]}})\n"
13711434
)
13721435
"""
1373-
if prediction > 0.5:
1436+
if prediction[0] > prediction[1]:
13741437
classification_variable = "first_event"
13751438
else:
13761439
classification_variable = "second_event"
@@ -1379,7 +1442,11 @@ def _binary_target(
13791442
"""
13801443
# Return classification and probability value
13811444
elif sum(returns) == 1 and len(returns) == 2:
1382-
cls.score_code += f"{'':4}return prediction[0], prediction[1]"
1445+
cls.score_code += (
1446+
f"{'':4}if input_array.shape[0] == 1\n"
1447+
f"{'':8}return prediction[0], prediction[1]\n"
1448+
f"{'':4}else:\n"
1449+
f"{'':8}return pd.DataFrame(prediction, columns=[{','.join(metrics)}])")
13831450
"""
13841451
return prediction[0], prediction[1]
13851452
"""
@@ -1392,14 +1459,23 @@ def _binary_target(
13921459
# Determine which return is the classification value
13931460
class_index = [i for i, x in enumerate(returns) if x][0]
13941461
if class_index == 0:
1395-
cls.score_code += f"{'':4}return prediction[0], prediction[1]"
1462+
cls.score_code += (
1463+
f"{'':4}if input_array.shape[0] == 1\n"
1464+
f"{'':8}return prediction[0], prediction[1]\n"
1465+
f"{'':4}else:\n"
1466+
f"{'':8}output_table = pd.DataFrame(prediction, columns=[{','.join(metrics)},drop])\n"
1467+
f"{'':8}return output_table.drop('drop', axis=1)")
1468+
13961469
"""
13971470
return prediction[0], prediction[1]
13981471
"""
13991472
else:
14001473
cls.score_code += (
1401-
f"{'':4}return prediction[{class_index}], prediction[0]"
1402-
)
1474+
f"{'':4}if input_array.shape[0] == 1\n"
1475+
f"{'':8}return prediction[{class_index}], prediction[0]\n"
1476+
f"{'':4}else:\n"
1477+
f"{'':8}output_table = pd.DataFrame(prediction, columns=[{',drop,'.join(metrics[::-1])}])\n"
1478+
f"{'':8}return output_table.drop('drop', axis=1)")
14031479
"""
14041480
return prediction[2], prediction[0]
14051481
"""
@@ -1408,8 +1484,11 @@ def _binary_target(
14081484
elif len(metrics) == 3:
14091485
if h2o_model:
14101486
cls.score_code += (
1411-
f"{'':4}return prediction[1][0], float(prediction[1][1]), "
1412-
f"float(prediction[1][2])"
1487+
f"{'':4}if input_array.shape[0] == 1\n"
1488+
f"{'':8}return prediction[1][0], float(prediction[1][1]), "
1489+
f"float(prediction[1][2])\n"
1490+
f"{'':4}else:\n"
1491+
f"{'':8}return pd.DataFrame(prediction[1:], columns=[{','.join(metrics)}])"
14131492
)
14141493
"""
14151494
return prediction[1][0], float(prediction[1][1]), float(prediction[1][2])
@@ -1426,6 +1505,7 @@ def _binary_target(
14261505
f"{'':4}else:\n"
14271506
f"{'':8}{metrics[0]} = \"{target_values[1]}\"\n\n"
14281507
f"{'':4}return {metrics[0]}, prediction, 1 - prediction"
1508+
14291509
)
14301510
"""
14311511
if prediction > 0.5:

0 commit comments

Comments
 (0)