diff --git a/examples/pzmm_binary_classification_model_import.ipynb b/examples/pzmm_binary_classification_model_import.ipynb index a2bc57c3..940787ab 100644 --- a/examples/pzmm_binary_classification_model_import.ipynb +++ b/examples/pzmm_binary_classification_model_import.ipynb @@ -717,7 +717,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "Collapsed": "false" }, @@ -751,8 +751,7 @@ " \n", " # Calculate the model statistics, ROC chart, and Lift chart; then write to json files\n", " pzmm.JSONFiles.calculate_model_statistics(\n", - " target_value=1, \n", - " prob_value=0.5, \n", + " target_value=1,\n", " train_data=train_data, \n", " test_data=test_data, \n", " json_path=path\n", diff --git a/examples/pzmm_generate_complete_model_card.ipynb b/examples/pzmm_generate_complete_model_card.ipynb index 525958cf..3a68271b 100644 --- a/examples/pzmm_generate_complete_model_card.ipynb +++ b/examples/pzmm_generate_complete_model_card.ipynb @@ -874,8 +874,7 @@ "source": [ "# Step 10: Write model statistics files\n", "pzmm.JSONFiles.calculate_model_statistics(\n", - " target_value=1, \n", - " prob_value=0.5, \n", + " target_value=1,\n", " train_data=train_scored[[target, ti, t1]], \n", " test_data=test_scored[[target, ti, t1]],\n", " validate_data=test_scored[[target, ti, t1]],\n", @@ -1786,7 +1785,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": ".venv", "language": "python", "name": "python3" }, diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py index 6e751781..1c0c560d 100644 --- a/src/sasctl/pzmm/write_json_files.py +++ b/src/sasctl/pzmm/write_json_files.py @@ -165,7 +165,7 @@ def write_var_json( @staticmethod def generate_variable_properties( - input_data: Union[DataFrame, Series] + input_data: Union[DataFrame, Series], ) -> List[dict]: """ Generate a list of dictionaries of variable properties given an input dataframe. @@ -1192,7 +1192,6 @@ def bias_dataframes_to_json( def calculate_model_statistics( cls, target_value: Union[str, int, float], - prob_value: Union[int, float, None] = None, validate_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, train_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, test_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, @@ -1211,8 +1210,7 @@ def calculate_model_statistics( Datasets must contain the actual and predicted values and may optionally contain the predicted probabilities. If no probabilities are provided, a dummy probability dataset is generated based on the predicted values and normalized by - the target value. If a probability threshold value is not provided, the - threshold value is set at 0.5. + the target value. Datasets can be provided in the following forms, with the assumption that data is ordered as `actual`, `predict`, and `probability` respectively: @@ -1229,9 +1227,6 @@ def calculate_model_statistics( ---------- target_value : str, int, or float Target event value for model prediction events. - prob_value : int or float, optional - The threshold value for model predictions to indicate an event occurred. The - default value is 0.5. validate_data : pandas.DataFrame, list of list, or numpy.ndarray, optional Dataset pertaining to the validation data. The default value is None. train_data : pandas.DataFrame, list of list, or numpy.ndarray, optional @@ -1284,19 +1279,22 @@ def calculate_model_statistics( continue data = cls.stat_dataset_to_dataframe(data, target_value, target_type) + data["predict_proba2"] = 1 - data["predict_proba"] conn.upload( data, - casout={"name": "assess_dataset", "replace": True, "caslib": "Public"}, + casout={"caslib": "Public", "name": "assess_dataset", "replace": True}, ) + if target_type == "classification": conn.percentile.assess( table={"name": "assess_dataset", "caslib": "Public"}, - response="predict", - pVar="predict_proba", - event=str(target_value), - pEvent=str(prob_value) if prob_value else str(0.5), - inputs="actual", + inputs="predict_proba", + response="actual", + event="1", + pvar="predict_proba2", + pevent="0", + includeLift=True, fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"}, rocOut={"name": "ROC", "replace": True, "caslib": "Public"}, casout={"name": "Lift", "replace": True, "caslib": "Public"}, @@ -1304,10 +1302,10 @@ def calculate_model_statistics( else: conn.percentile.assess( table={"name": "assess_dataset", "caslib": "Public"}, - response="predict", - inputs="actual", - fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"}, - casout={"name": "Lift", "replace": True, "caslib": "Public"}, + response="actual", + inputs="predict", + fitStatOut={"caslib": "Public", "name": "FitStat", "replace": True}, + casout={"caslib": "Public", "name": "Lift", "replace": True}, ) fitstat_dict = (