From 204fd856d7c9ffbc2147e0d856f155a185e1f5c9 Mon Sep 17 00:00:00 2001 From: djm21 Date: Fri, 13 Dec 2024 10:54:26 -0800 Subject: [PATCH 1/4] updates to score code + score code tests (EDMMMX-11741) --- src/sasctl/pzmm/write_score_code.py | 39 ++++++++++++++++++++++++++++- tests/unit/test_write_score_code.py | 18 ++++++++++--- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index b29d1a1f..efd47220 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -497,7 +497,7 @@ def _write_imports( import codecs binary_string = "" -model = pickle.load(codecs.decode(binary_string.encode(), "base64")) +model = pickle.loads(codecs.decode(binary_string.encode(), "base64")) """ def _viya35_model_load( @@ -562,6 +562,26 @@ def _viya35_model_load( f'{model_id}/{model_file_name}")))' ) else: + if pickle_type.lower() == 'pickle': + self.score_code += ( + f'model_path = Path("/models/resources/viya/{model_id}' + f'")\nwith open(model_path / "{model_file_name}", ' + f"\"rb\") as pickle_model:\n{'':4}model = pd.read_pickle" + "(pickle_model)\n\n" + ) + """ +model_path = Path("/models/resources/viya/") +with open(model_path / "model.pickle", "rb") as pickle_model: + model = pd.read_pickle(pickle_model) + + """ + return ( + f"{'':8}model_path = Path(\"/models/resources/viya/{model_id}" + f"\")\n{'':8}with open(model_path / \"{model_file_name}\", " + f"\"rb\") as pickle_model:\n{'':12}model = pd.read_pickle" + "(pickle_model)" + ) + self.score_code += ( f'model_path = Path("/models/resources/viya/{model_id}' f'")\nwith open(model_path / "{model_file_name}", ' @@ -658,6 +678,23 @@ def _viya4_model_load( f"safe_mode=True)\n" ) else: + if pickle_type.lower() == "pickle": + self.score_code += ( + f"with open(Path(settings.pickle_path) / " + f'"{model_file_name}", "rb") as pickle_model:\n' + f"{'':4}model = pd.read_pickle(pickle_model)\n\n" + ) + """ + with open(Path(settings.pickle_path) / "model.pickle", "rb") as pickle_model: + model = pd.read_pickle(pickle_model) + + """ + return ( + f"{'':8}with open(Path(settings.pickle_path) / " + f'"{model_file_name}", "rb") as pickle_model:\n' + f"{'':12}model = pd.read_pickle(pickle_model)\n\n" + ) + self.score_code += ( f"with open(Path(settings.pickle_path) / " f'"{model_file_name}", "rb") as pickle_model:\n' diff --git a/tests/unit/test_write_score_code.py b/tests/unit/test_write_score_code.py index 25eb1aaf..f953b5cf 100644 --- a/tests/unit/test_write_score_code.py +++ b/tests/unit/test_write_score_code.py @@ -118,8 +118,13 @@ def test_viya35_model_load(): """ sc = ScoreCode() load_text = sc._viya35_model_load("1234", "normal") - assert "pickle.load(pickle_model)" in sc.score_code - assert "pickle.load(pickle_model)" in load_text + assert "pd.read_pickle(pickle_model)" in sc.score_code + assert "pd.read_pickle(pickle_model)" in load_text + + sc = ScoreCode() + load_text = sc._viya35_model_load("1234", "normal", pickle_type="dill") + assert "dill.load(pickle_model)" in sc.score_code + assert "dill.load(pickle_model)" in load_text sc = ScoreCode() mojo_text = sc._viya35_model_load("2345", "mojo", mojo_model=True) @@ -142,8 +147,13 @@ def test_viya4_model_load(): """ sc = ScoreCode() load_text = sc._viya4_model_load("normal") - assert "pickle.load(pickle_model)" in sc.score_code - assert "pickle.load(pickle_model)" in load_text + assert "pd.read_pickle(pickle_model)" in sc.score_code + assert "pd.read_pickle(pickle_model)" in load_text + + sc = ScoreCode() + load_text = sc._viya35_model_load("1234", "normal", pickle_type="dill") + assert "dill.load(pickle_model)" in sc.score_code + assert "dill.load(pickle_model)" in load_text sc = ScoreCode() mojo_text = sc._viya4_model_load("mojo", mojo_model=True) From e2bd4e5e1f5eca995d8018a61d40e06ff0c6830b Mon Sep 17 00:00:00 2001 From: djm21 Date: Fri, 13 Dec 2024 10:57:50 -0800 Subject: [PATCH 2/4] black reformatting (EDMMMX-11741) --- src/sasctl/pzmm/write_score_code.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index efd47220..afe7e468 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -562,12 +562,12 @@ def _viya35_model_load( f'{model_id}/{model_file_name}")))' ) else: - if pickle_type.lower() == 'pickle': + if pickle_type.lower() == "pickle": self.score_code += ( - f'model_path = Path("/models/resources/viya/{model_id}' - f'")\nwith open(model_path / "{model_file_name}", ' - f"\"rb\") as pickle_model:\n{'':4}model = pd.read_pickle" - "(pickle_model)\n\n" + f'model_path = Path("/models/resources/viya/{model_id}' + f'")\nwith open(model_path / "{model_file_name}", ' + f"\"rb\") as pickle_model:\n{'':4}model = pd.read_pickle" + "(pickle_model)\n\n" ) """ model_path = Path("/models/resources/viya/") From 5306aee8105ca8ba187b20ab3cecea9b10fa763a Mon Sep 17 00:00:00 2001 From: djm21 Date: Mon, 23 Dec 2024 11:47:42 -0500 Subject: [PATCH 3/4] update model card example to create better pre-processing function --- .../pzmm_generate_complete_model_card.ipynb | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/examples/pzmm_generate_complete_model_card.ipynb b/examples/pzmm_generate_complete_model_card.ipynb index 7ebf0858..87fb8f63 100644 --- a/examples/pzmm_generate_complete_model_card.ipynb +++ b/examples/pzmm_generate_complete_model_card.ipynb @@ -568,7 +568,8 @@ " df.columns = df.columns.str.replace(' ', '')\n", " df.columns = df.columns.str.replace('-', '_')\n", " df = df.drop(['Sex_Male'], axis=1)\n", - " df = pd.concat([df, cat_vals], axis=1).drop('index', axis=1)\n", + " if 'index' in df.columns or 'index' in cat_vals.columns:\n", + " df = pd.concat([df, cat_vals], axis=1).drop('index', axis=1)\n", " # For the model to score correctly, all OHE columns must exist\n", " input_cols = [\n", " \"Education_9th\", \"Education_10th\", \"Education_11th\", \"Education_12th\", \"Education_Assoc_voc\", \"Education_Assoc_acdm\", \"Education_Masters\", \"Education_Prof_school\",\n", @@ -579,9 +580,20 @@ " 'Relationship_Not_in_family', 'Relationship_Own_child', 'Relationship_Unmarried', 'Relationship_Wife', 'Relationship_Other_relative', 'WorkClass_Private',\n", " 'Education_Bachelors'\n", " ]\n", + " # OHE columns must be removed after data combination\n", + " predictor_columns = ['Age', 'HoursPerWeek', 'WorkClass_Private', 'WorkClass_Self', 'WorkClass_Gov', \n", + " 'WorkClass_Other', 'Education_HS_grad', 'Education_Some_HS', 'Education_Assoc', 'Education_Some_college',\n", + " 'Education_Bachelors', 'Education_Adv_Degree', 'Education_No_HS', 'MartialStatus_Married_civ_spouse',\n", + " 'MartialStatus_Never_married', 'MartialStatus_Divorced', 'MartialStatus_Separated', 'MartialStatus_Widowed',\n", + " 'MartialStatus_Other', 'Relationship_Husband', 'Relationship_Not_in_family', 'Relationship_Own_child', 'Relationship_Unmarried',\n", + " 'Relationship_Wife', 'Relationship_Other_relative', 'Race_White', 'Race_Black', 'Race_Asian_Pac_Islander',\n", + " 'Race_Amer_Indian_Eskimo', 'Race_Other', 'Sex_Female']\n", + "\n", " for col in input_cols:\n", " if col not in df.columns:\n", " df[col] = 0\n", + " \n", + "\n", " df[\"Education_Some_HS\"] = df[\"Education_9th\"] | df[\"Education_10th\"] | df[\"Education_11th\"] | df[\"Education_12th\"]\n", " df[\"Education_Assoc\"] = df[\"Education_Assoc_voc\"] | df[\"Education_Assoc_acdm\"]\n", " df[\"Education_Adv_Degree\"] = df[\"Education_Masters\"] | df[\"Education_Prof_school\"] | df[\"Education_Doctorate\"]\n", @@ -593,6 +605,8 @@ "\n", " df[\"MartialStatus_Other\"] = df[\"MartialStatus_Married_spouse_absent\"] | df[\"MartialStatus_Married_AF_spouse\"]\n", "\n", + " df = df[predictor_columns]\n", + "\n", " return df" ] }, @@ -1772,7 +1786,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "pandatwo", "language": "python", "name": "python3" }, From 18c7c641c360af85c170f7ea2df25ffb8906444e Mon Sep 17 00:00:00 2001 From: djm21 Date: Mon, 23 Dec 2024 17:18:30 -0500 Subject: [PATCH 4/4] Update to model card example metadata --- examples/pzmm_generate_complete_model_card.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pzmm_generate_complete_model_card.ipynb b/examples/pzmm_generate_complete_model_card.ipynb index 87fb8f63..525958cf 100644 --- a/examples/pzmm_generate_complete_model_card.ipynb +++ b/examples/pzmm_generate_complete_model_card.ipynb @@ -1786,7 +1786,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pandatwo", + "display_name": "Python 3", "language": "python", "name": "python3" },