Skip to content

Commit a96c3ea

Browse files
committed
Updates to score code testing; executing snippets
1 parent 6f021de commit a96c3ea

File tree

2 files changed

+197
-64
lines changed

2 files changed

+197
-64
lines changed

src/sasctl/pzmm/write_score_code.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ def _predict_method(
812812
f"{'':4}h2o_array = h2o.H2OFrame(input_array, "
813813
f"column_types=column_types)\n{'':4}prediction = "
814814
f"model.{method.__name__}(h2o_array)\n{'':4}prediction"
815-
f" = h2o.as_list(prediction, use_pandas=False)\n"
815+
f" = h2o.as_list(prediction, use_pandas=prediction.shape[0]>1)\n"
816816
)
817817
"""
818818
input_array = pd.DataFrame(
@@ -821,7 +821,7 @@ def _predict_method(
821821
input_array = impute_missing_values(input_array)
822822
column_types = {"var1": "string", "var2": "numeric", "var3": "numeric"}
823823
h2o_array = h2o.H2OFrame(input_array, column_types=column_types)
824-
prediction = model.predict(h2o.array)
824+
prediction = model.predict(h2o_array)
825825
prediction = h2o.as_list(prediction, use_pandas=False)
826826
"""
827827
# Statsmodels models
@@ -1167,30 +1167,29 @@ def _no_targets_no_thresholds(
11671167
f"{'':4}if input_array.shape[0] == 1:\n"
11681168
f"{'':8}{metrics} = prediction[1][0]\n{'':8}return {metrics}\n"
11691169
f"{'':4}else:\n"
1170-
f"{'':8}prediction = pd.Dataframe(prediction[1:], columns=prediction[0])\n"
1171-
f"{'':8}return pd.DataFrame({{'{metrics}': prediction.iloc[:,0]}})"
1170+
f"{'':8}return prediction.iloc[:, 0]\n"
11721171
)
11731172
"""
11741173
if input_array.shape[0] == 1:
11751174
output_variable = prediction[1][0]
11761175
return output_variable
11771176
else:
1178-
prediction = pd.Dataframe(prediction[1:], columns=prediction[0])
1179-
return pd.DataFrame({'output_variable': prediction.iloc[:,0]})
1177+
return prediction.iloc[:, 0]
11801178
"""
11811179
else:
11821180
cls.score_code += (
11831181
f"{'':4}if input_array.shape[0] == 1:\n"
11841182
f"{'':8}{metrics} = prediction[0]\n{'':8}return {metrics}\n"
11851183
f"{'':4}else:\n"
1186-
f"{'':8}output_table = pd.DataFrame('{metrics}': prediction)\n{'':8}return output_table"
1184+
f"{'':8}output_table = pd.DataFrame({{'{metrics}': prediction}})\n"
1185+
f"{'':8}return output_table"
11871186
)
11881187
"""
11891188
if input_array.shape[0] == 1:
11901189
output_variable = prediction[0]
11911190
return output_variable
11921191
else:
1193-
output_table = pd.DataFrame('output_variable': prediction)
1192+
output_table = pd.DataFrame({'output_variable': prediction})
11941193
return output_table
11951194
"""
11961195
else:
@@ -1204,40 +1203,42 @@ def _no_targets_no_thresholds(
12041203
f"{'':8}{metrics[i + 1]} = float(prediction[1][{i + 1}])\n"
12051204
)
12061205
cls.score_code += (
1206+
f"{'':8}return {', '.join(metrics)}\n"
12071207
f"{'':4}else:\n"
1208-
f"{'':8}output_table = pd.DataFrame(prediction[1:], columns={metrics})\n"
1208+
f"{'':8}output_table = prediction\n"
1209+
f"{'':8}output_table.columns = {metrics}\n"
12091210
f"{'':8}return output_table\n"
12101211
)
12111212
"""
12121213
if input_array.shape[0] == 1:
12131214
classification_variable = prediction[1][0]
12141215
prediction_variable_1 = float(prediction[1][1])
12151216
prediction_variable_2 = float(prediction[1][2])
1217+
return classification_variable, prediction_variable_1, prediction_variable_2
12161218
else:
1217-
output_table = pd.DataFrame(prediction[1:], columns=[classification_variable,variable_1,variable_2])
1219+
output_table = pd.DataFrame(prediction[1:], columns=[classification_variable, prediction_variable_1, prediction_variable_2])
12181220
return output_table
12191221
"""
12201222
else:
12211223
cls.score_code += f"{'':4}if input_array.shape[0] == 1:\n"
12221224
for i in range(len(metrics)):
12231225
cls.score_code += f"{'':8}{metrics[i]} = prediction[{i}]\n"
1226+
cls.score_code += f"\n{'':8}return {', '.join(metrics)}\n"
12241227
cls.score_code += (
12251228
f"{'':4}else:\n"
1226-
f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics})\n"
1227-
f"{'':8}return output_table\n"
1229+
f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics})"
1230+
f"\n{'':8}return output_table\n"
12281231
)
1229-
cls.score_code += f"\n{'':4}return {', '.join(metrics)}"
12301232
"""
12311233
if input_array.shape[0] == 1:
12321234
classification_variable = prediction[0]
12331235
prediction_variable_1 = prediction[1]
12341236
prediction_variable_2 = prediction[2]
12351237
else:
1236-
output_table = pd.DataFrame(prediction, columns=["classification_variable","prediction_variable_1","prediction_variable_2"])
1238+
output_table = pd.DataFrame(prediction, columns=["classification_variable", "prediction_variable_1", "prediction_variable_2"])
12371239
return output_table
12381240
"""
12391241

1240-
12411242
@classmethod
12421243
def _binary_target(
12431244
cls,

tests/unit/test_write_score_code.py

Lines changed: 181 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pickle
1010
import random
1111
import tempfile
12+
import unittest
1213
from pathlib import Path
1314
from unittest.mock import DEFAULT, MagicMock, patch
1415

@@ -20,6 +21,7 @@
2021
from sasctl import current_session
2122
from sasctl.core import RestObj, VersionInfo
2223
from sasctl.pzmm.write_score_code import ScoreCode as sc
24+
from sasctl.pzmm.write_score_code import ScoreCode
2325

2426

2527
@pytest.fixture()
@@ -301,50 +303,174 @@ def test_determine_score_metrics():
301303
) == ["P_A", "P_B", "P_C"]
302304

303305

304-
def test_no_targets_no_thresholds():
305-
"""
306-
Test Cases:
307-
- len(metrics) == 1
308-
- non-h2o
309-
- h2o
310-
- len(metrics) > 1
311-
- non-h2o
312-
- h2o
313-
- raise error for invalid config (returns - metrics != 0)
314-
"""
315-
metrics = "Classification"
316-
returns = [1, "A"]
317-
with pytest.raises(ValueError):
318-
sc._no_targets_no_thresholds(metrics, returns)
306+
class TestNoTargetsNoThresholds(unittest.TestCase):
307+
def setUp(self):
308+
self.sc = ScoreCode
319309

320-
returns = [1]
321-
sc._no_targets_no_thresholds(metrics, returns)
322-
assert "Classification = prediction" in sc.score_code
323-
sc.score_code = ""
310+
def tearDown(self):
311+
self.sc.score_code = ""
324312

325-
sc._no_targets_no_thresholds(metrics, returns, h2o_model=True)
326-
assert "Classification = prediction[1][0]"
327-
sc.score_code = ""
313+
def execute_snippet(self, *args):
314+
scope = {}
315+
exec(self.sc.score_code, scope)
316+
test_snippet = scope["test_snippet"]
317+
return test_snippet(*args)
328318

329-
metrics = ["Classification", "Proba_A", "Proba_B", "Proba_C"]
330-
returns = ["I", 1, 2, 3]
331-
sc._no_targets_no_thresholds(metrics, returns)
332-
assert (
333-
sc.score_code == f"{'':4}Classification = prediction[0]\n"
334-
f"{'':4}Proba_A = prediction[1]\n"
335-
f"{'':4}Proba_B = prediction[2]\n"
336-
f"{'':4}Proba_C = prediction[3]\n\n"
337-
f"{'':4}return Classification, Proba_A, Proba_B, Proba_C"
338-
)
339-
sc.score_code = ""
340-
sc._no_targets_no_thresholds(metrics, returns, h2o_model=True)
341-
assert (
342-
sc.score_code == f"{'':4}Classification = prediction[1][0]\n"
343-
f"{'':4}Proba_A = float(prediction[1][1])\n"
344-
f"{'':4}Proba_B = float(prediction[1][2])\n"
345-
f"{'':4}Proba_C = float(prediction[1][3])\n\n"
346-
f"{'':4}return Classification, Proba_A, Proba_B, Proba_C"
347-
)
319+
def test_improper_arguments(self):
320+
metrics = "Classification"
321+
returns = [1, "A"]
322+
with pytest.raises(ValueError):
323+
self.sc._no_targets_no_thresholds(metrics, returns)
324+
325+
def test_single_metric(self):
326+
metrics = "Classification"
327+
returns = [1]
328+
self.sc.score_code += "import pandas as pd\n" \
329+
"def test_snippet(input_array, prediction):\n"
330+
self.sc._no_targets_no_thresholds(metrics, returns)
331+
# Single row
332+
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
333+
prediction = [.5]
334+
self.assertEqual(self.execute_snippet(input_array, prediction), .5)
335+
# Multi row
336+
input_array = pd.DataFrame({"A": [.9, 1, 1.1]})
337+
prediction = [.3, .4, .5]
338+
pd.testing.assert_frame_equal(
339+
self.execute_snippet(input_array, prediction),
340+
pd.DataFrame({metrics: prediction})
341+
)
342+
343+
def test_single_metric_h2o(self):
344+
metrics = "Classification"
345+
returns = [1]
346+
self.sc.score_code += "import pandas as pd\n" \
347+
"def test_snippet(input_array, prediction):\n"
348+
self.sc._no_targets_no_thresholds(metrics, returns, h2o_model=True)
349+
# Single row
350+
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
351+
prediction = [[], [.5]]
352+
self.assertEqual(self.execute_snippet(input_array, prediction), .5)
353+
# Multi row
354+
input_array = pd.DataFrame({"A": [.9, 1, 1.1]})
355+
prediction = pd.DataFrame({"predict": [0, 1, 1], "p0": [.3, .4, .5]})
356+
pd.testing.assert_series_equal(
357+
self.execute_snippet(input_array, prediction),
358+
pd.Series([0, 1, 1], name="predict")
359+
)
360+
361+
def test_multi_metric(self):
362+
metrics = ["Classification", "Proba_A", "Proba_B", "Proba_C"]
363+
returns = ["I", 1, 2, 3]
364+
self.sc.score_code += "import pandas as pd\n" \
365+
"def test_snippet(input_array, prediction):\n"
366+
self.sc._no_targets_no_thresholds(metrics, returns)
367+
# Single row
368+
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
369+
prediction = ["i", .3, .4, .5]
370+
self.assertEqual(
371+
self.execute_snippet(input_array, prediction),
372+
("i", .3, .4, .5)
373+
)
374+
# Multi row
375+
input_array = pd.DataFrame({"A": [1, 0, 1]})
376+
prediction = pd.DataFrame({
377+
"Classification": ["i", "j", "k"],
378+
"Proba_A": [.1, .2, .3],
379+
"Proba_B": [.4, .5, .6],
380+
"Proba_C": [.7, .8, .9]
381+
})
382+
pd.testing.assert_frame_equal(
383+
self.execute_snippet(input_array, prediction),
384+
prediction
385+
)
386+
387+
def test_multi_metric_h2o(self):
388+
metrics = ["Classification", "Proba_A", "Proba_B", "Proba_C"]
389+
returns = ["I", 1, 2, 3]
390+
self.sc.score_code += "import pandas as pd\n" \
391+
"def test_snippet(input_array, prediction):\n"
392+
self.sc._no_targets_no_thresholds(metrics, returns, h2o_model=True)
393+
# Single row
394+
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
395+
prediction = [[], ["i", .3, .4, .5]]
396+
self.assertEqual(
397+
self.execute_snippet(input_array, prediction),
398+
("i", .3, .4, .5)
399+
)
400+
# Multi row
401+
input_array = pd.DataFrame({"A": [1, 0, 1]})
402+
prediction = pd.DataFrame({
403+
"Classification": ["i", "j", "k"],
404+
"Proba_A": [.1, .2, .3],
405+
"Proba_B": [.4, .5, .6],
406+
"Proba_C": [.7, .8, .9]
407+
})
408+
pd.testing.assert_frame_equal(
409+
self.execute_snippet(input_array, prediction),
410+
prediction
411+
)
412+
413+
414+
class TestBinaryTarget(unittest.TestCase):
415+
def setUp(self):
416+
self.sc = ScoreCode
417+
self.target_values = ["A", "B"]
418+
419+
def tearDown(self):
420+
self.sc.score_code = ""
421+
422+
def execute_snippet(self, *args):
423+
scope = {}
424+
exec(self.sc.score_code, scope)
425+
test_snippet = scope["test_snippet"]
426+
return test_snippet(*args)
427+
428+
def test_improper_arguments(self):
429+
with pytest.raises(ValueError):
430+
sc._binary_target([], [], ["A", 1, 2, 3])
431+
with pytest.raises(ValueError):
432+
sc._binary_target([], [], ["A", "B"])
433+
with pytest.raises(ValueError):
434+
sc._binary_target(["A", "B", "C", "D"], [], [])
435+
436+
def test_one_metric_one_return(self):
437+
metrics = "Classification"
438+
returns = [""]
439+
self.sc.score_code += "import pandas as pd\n" \
440+
"def test_snippet(input_array, prediction):\n"
441+
self.sc._binary_target(metrics, self.target_values, returns)
442+
# Single row
443+
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
444+
prediction = .5
445+
self.assertEqual(self.execute_snippet(input_array, prediction), .5)
446+
# Multi row
447+
input_array = pd.DataFrame({"A": [.9, 1, 1.1]})
448+
prediction = [.3, .4, .5]
449+
pd.testing.assert_frame_equal(
450+
self.execute_snippet(input_array, prediction),
451+
pd.DataFrame({metrics: prediction})
452+
)
453+
454+
def test_one_metric_two_returns(self):
455+
pass
456+
457+
def test_one_metric_three_returns(self):
458+
pass
459+
460+
def test_two_metrics_one_return(self):
461+
pass
462+
463+
def test_two_metrics_two_returns(self):
464+
pass
465+
466+
def test_two_metrics_three_returns(self):
467+
pass
468+
469+
def test_three_metrics_one_return(self):
470+
pass
471+
472+
def test_three_metrics_three_returns(self):
473+
pass
348474

349475

350476
def test_binary_target():
@@ -398,14 +524,6 @@ def test_binary_target():
398524
- sum(returns) >= 2
399525
- len(metrics) > 3
400526
"""
401-
# Initial errors
402-
with pytest.raises(ValueError):
403-
sc._binary_target([], [], ["A", 1, 2, 3])
404-
with pytest.raises(ValueError):
405-
sc._binary_target([], [], ["A", "B"])
406-
with pytest.raises(ValueError):
407-
sc._binary_target(["A", "B", "C", "D"], [], [])
408-
409527
# # metrics == 1
410528
metrics = "Classification"
411529
sc._binary_target(metrics, ["A", "B"], [""], h2o_model=True)
@@ -504,6 +622,20 @@ def test_binary_target():
504622
sc._binary_target(metrics, ["A", "B"], ["1", 2, 3])
505623

506624

625+
class TestNonbinaryTargets(unittest.TestCase):
626+
def setUp(self):
627+
self.sc = ScoreCode
628+
629+
def tearDown(self):
630+
self.sc.score_code = ""
631+
632+
def execute_snippet(self, *args):
633+
scope = {}
634+
exec(self.sc.score_code, scope)
635+
test_snippet = scope["test_snippet"]
636+
return test_snippet(*args)
637+
638+
507639
def test_nonbinary_targets():
508640
"""
509641
Test Cases:

0 commit comments

Comments
 (0)