Merge pull request #210 from sassoftware/assessment_fix

djm21 · web-flow · commit 1262201b00cb · 2025-04-08T12:07:34.000-07:00
diff --git a/examples/pzmm_binary_classification_model_import.ipynb b/examples/pzmm_binary_classification_model_import.ipynb
@@ -717,7 +717,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {
     "Collapsed": "false"
    },
@@ -751,8 +751,7 @@
     "    \n",
     "    # Calculate the model statistics, ROC chart, and Lift chart; then write to json files\n",
     "    pzmm.JSONFiles.calculate_model_statistics(\n",
-    "        target_value=1, \n",
-    "        prob_value=0.5, \n",
+    "        target_value=1,\n",
     "        train_data=train_data, \n",
     "        test_data=test_data, \n",
     "        json_path=path\n",
diff --git a/examples/pzmm_generate_complete_model_card.ipynb b/examples/pzmm_generate_complete_model_card.ipynb
@@ -874,8 +874,7 @@
    "source": [
     "# Step 10: Write model statistics files\n",
     "pzmm.JSONFiles.calculate_model_statistics(\n",
-    "        target_value=1, \n",
-    "        prob_value=0.5, \n",
+    "        target_value=1,\n",
     "        train_data=train_scored[[target, ti, t1]], \n",
     "        test_data=test_scored[[target, ti, t1]],\n",
     "        validate_data=test_scored[[target, ti, t1]],\n",
@@ -1786,7 +1785,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py
@@ -165,7 +165,7 @@ def write_var_json(
 
     @staticmethod
     def generate_variable_properties(
-        input_data: Union[DataFrame, Series]
+        input_data: Union[DataFrame, Series],
     ) -> List[dict]:
         """
         Generate a list of dictionaries of variable properties given an input dataframe.
@@ -1192,7 +1192,6 @@ def bias_dataframes_to_json(
     def calculate_model_statistics(
         cls,
         target_value: Union[str, int, float],
-        prob_value: Union[int, float, None] = None,
         validate_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None,
         train_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None,
         test_data: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None,
@@ -1211,8 +1210,7 @@ def calculate_model_statistics(
         Datasets must contain the actual and predicted values and may optionally contain
         the predicted probabilities. If no probabilities are provided, a dummy
         probability dataset is generated based on the predicted values and normalized by
-        the target value. If a probability threshold value is not provided, the
-        threshold value is set at 0.5.
+        the target value.
 
         Datasets can be provided in the following forms, with the assumption that data
         is ordered as `actual`, `predict`, and `probability` respectively:
@@ -1229,9 +1227,6 @@ def calculate_model_statistics(
         ----------
         target_value : str, int, or float
             Target event value for model prediction events.
-        prob_value : int or float, optional
-            The threshold value for model predictions to indicate an event occurred. The
-            default value is 0.5.
         validate_data : pandas.DataFrame, list of list, or numpy.ndarray, optional
             Dataset pertaining to the validation data. The default value is None.
         train_data : pandas.DataFrame, list of list, or numpy.ndarray, optional
@@ -1284,30 +1279,33 @@ def calculate_model_statistics(
                 continue
 
             data = cls.stat_dataset_to_dataframe(data, target_value, target_type)
+            data["predict_proba2"] = 1 - data["predict_proba"]
 
             conn.upload(
                 data,
-                casout={"name": "assess_dataset", "replace": True, "caslib": "Public"},
+                casout={"caslib": "Public", "name": "assess_dataset", "replace": True},
             )
+
             if target_type == "classification":
                 conn.percentile.assess(
                     table={"name": "assess_dataset", "caslib": "Public"},
-                    response="predict",
-                    pVar="predict_proba",
-                    event=str(target_value),
-                    pEvent=str(prob_value) if prob_value else str(0.5),
-                    inputs="actual",
+                    inputs="predict_proba",
+                    response="actual",
+                    event="1",
+                    pvar="predict_proba2",
+                    pevent="0",
+                    includeLift=True,
                     fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"},
                     rocOut={"name": "ROC", "replace": True, "caslib": "Public"},
                     casout={"name": "Lift", "replace": True, "caslib": "Public"},
                 )
             else:
                 conn.percentile.assess(
                     table={"name": "assess_dataset", "caslib": "Public"},
-                    response="predict",
-                    inputs="actual",
-                    fitStatOut={"name": "FitStat", "replace": True, "caslib": "Public"},
-                    casout={"name": "Lift", "replace": True, "caslib": "Public"},
+                    response="actual",
+                    inputs="predict",
+                    fitStatOut={"caslib": "Public", "name": "FitStat", "replace": True},
+                    casout={"caslib": "Public", "name": "Lift", "replace": True},
                 )
 
             fitstat_dict = (