|
9 | 9 | import os
|
10 | 10 | import pickle
|
11 | 11 | import random
|
| 12 | +import shutil |
12 | 13 | import sys
|
13 | 14 | import tempfile
|
14 | 15 | import unittest
|
|
19 | 20 | import numpy as np
|
20 | 21 | import pandas as pd
|
21 | 22 | import pytest
|
| 23 | +from sklearn.model_selection import train_test_split |
| 24 | +from sklearn.tree import DecisionTreeClassifier |
22 | 25 |
|
23 | 26 | import sasctl.pzmm as pzmm
|
24 | 27 | from sasctl.pzmm.write_json_files import JSONFiles as jf
|
@@ -628,23 +631,56 @@ def test_create_requirements_json(change_dir):
|
628 | 631 |
|
629 | 632 | example_model = (Path.cwd() / "data/hmeqModels/DecisionTreeClassifier").resolve()
|
630 | 633 | with tempfile.TemporaryDirectory() as tmp_dir:
|
631 |
| - jf.create_requirements_json(example_model, Path(tmp_dir)) |
| 634 | + tmp_dir = Path(tmp_dir) |
| 635 | + for item in example_model.iterdir(): |
| 636 | + if item.is_file() and item.name != "DecisionTreeClassifier.pickle": |
| 637 | + shutil.copy(item, tmp_dir / tmp_dir.name) |
| 638 | + data = pd.read_csv("data/hmeq.csv") |
| 639 | + predictor_columns = [ |
| 640 | + "LOAN", |
| 641 | + "MORTDUE", |
| 642 | + "VALUE", |
| 643 | + "YOJ", |
| 644 | + "DEROG", |
| 645 | + "DELINQ", |
| 646 | + "CLAGE", |
| 647 | + "NINQ", |
| 648 | + "CLNO", |
| 649 | + "DEBTINC", |
| 650 | + ] |
| 651 | + target_column = "BAD" |
| 652 | + x = data[predictor_columns] |
| 653 | + y = data[target_column] |
| 654 | + x_train, x_test, y_train, y_test = train_test_split( |
| 655 | + x, y, test_size=0.3, random_state=42 |
| 656 | + ) |
| 657 | + x_test.fillna(x_test.mean(), inplace=True) |
| 658 | + x_train.fillna(x_train.mean(), inplace=True) |
| 659 | + dtc = DecisionTreeClassifier( |
| 660 | + max_depth=7, min_samples_split=2, min_samples_leaf=2, max_leaf_nodes=500 |
| 661 | + ) |
| 662 | + dtc = dtc.fit(x_train, y_train) |
| 663 | + with open(tmp_dir / "DecisionTreeClassifier.pickle", "wb") as pkl_file: |
| 664 | + pickle.dump(dtc, pkl_file) |
| 665 | + jf.create_requirements_json(tmp_dir, Path(tmp_dir)) |
632 | 666 | assert (Path(tmp_dir) / "requirements.json").exists()
|
633 | 667 |
|
634 |
| - json_dict = jf.create_requirements_json(example_model) |
635 |
| - assert "requirements.json" in json_dict |
636 |
| - expected = [ |
637 |
| - {"step": "install pandas", "command": f"pip install pandas=={pd.__version__}"}, |
638 |
| - {"step": "install numpy", "command": f"pip install numpy=={np.__version__}"}, |
639 |
| - { |
640 |
| - "step": "install sklearn", |
641 |
| - "command": f"pip install sklearn=={sk.__version__}", |
642 |
| - }, |
643 |
| - ] |
644 |
| - unittest.TestCase.maxDiff = None |
645 |
| - unittest.TestCase().assertCountEqual( |
646 |
| - json.loads(json_dict["requirements.json"]), expected |
647 |
| - ) |
| 668 | + json_dict = jf.create_requirements_json(tmp_dir) |
| 669 | + assert "requirements.json" in json_dict |
| 670 | + expected = [ |
| 671 | + { |
| 672 | + "step": "install numpy", |
| 673 | + "command": f"pip install numpy=={np.__version__}", |
| 674 | + }, |
| 675 | + { |
| 676 | + "step": "install sklearn", |
| 677 | + "command": f"pip install sklearn=={sk.__version__}", |
| 678 | + }, |
| 679 | + ] |
| 680 | + unittest.TestCase.maxDiff = None |
| 681 | + unittest.TestCase().assertCountEqual( |
| 682 | + json.loads(json_dict["requirements.json"]), expected |
| 683 | + ) |
648 | 684 |
|
649 | 685 |
|
650 | 686 | class TestAssessBiasHelpers(unittest.TestCase):
|
|
0 commit comments