21
21
# Package Imports
22
22
from sasctl .pzmm .write_score_code import ScoreCode as sc
23
23
from ..core import current_session
24
- from ..utils .decorators import deprecated
24
+ from ..utils .decorators import deprecated , experimental
25
25
from ..utils .misc import check_if_jupyter
26
26
27
27
try :
@@ -945,6 +945,20 @@ def assess_model_bias(
945
945
def format_max_differences (
946
946
maxdiff_dfs : List [DataFrame ], datarole : str = "TEST"
947
947
) -> DataFrame :
948
+ """
949
+ Converts a list of max differences DataFrames into a singular DataFrame
950
+ Parameters
951
+ ----------
952
+ maxdiff_dfs: List[DataFrame]
953
+ A list of max_differences DataFrames returned by CAS
954
+ datarole : string, optional
955
+ The data being used to assess bias (i.e. 'TEST', 'VALIDATION', etc.). Default is 'TEST.'
956
+
957
+ Returns
958
+ -------
959
+ DataFrame
960
+ A singluar DataFrame containing all max differences data
961
+ """
948
962
maxdiff_df = pd .concat (maxdiff_dfs )
949
963
maxdiff_df = maxdiff_df .rename (
950
964
columns = {"Value" : "maxdiff" , "Base" : "BASE" , "Compare" : "COMPARE" }
@@ -965,6 +979,28 @@ def format_group_metrics(
965
979
pred_values : str = None ,
966
980
datarole : str = "TEST" ,
967
981
) -> DataFrame :
982
+ """
983
+ Converts list of group metrics DataFrames to a single DataFrame
984
+ Parameters
985
+ ----------
986
+ groupmetrics_dfs: List[DataFrame]
987
+ List of group metrics DataFrames generated by CASAction
988
+ pred_values : string, required for regression problems, otherwise not used
989
+ Variable name containing the predicted values in score_table. The variable name must follow SAS naming
990
+ conventions (no spaces and the name cannot begin with a number or symbol).Required for regression problems.
991
+ The default value is None.
992
+ prob_values : list of strings, required for classification problems, otherwise not used
993
+ A list of variable names containing the predicted probability values in the score table. The first element
994
+ should represent the predicted probability of the target class. Required for classification problems. Default
995
+ is None.
996
+ datarole : string, optional
997
+ The data being used to assess bias (i.e. 'TEST', 'VALIDATION', etc.). Default is 'TEST.'
998
+
999
+ Returns
1000
+ -------
1001
+ DataFrame
1002
+ A singular DataFrame containing formatted data for group metrics
1003
+ """
968
1004
# adding group metrics dataframes and adding values/ formatting
969
1005
groupmetrics_df = pd .concat (groupmetrics_dfs )
970
1006
groupmetrics_df = groupmetrics_df .rename (
@@ -1006,6 +1042,7 @@ def format_group_metrics(
1006
1042
return groupmetrics_df
1007
1043
1008
1044
# TODO: Add doc_string and unit tests
1045
+ @experimental
1009
1046
@classmethod
1010
1047
def bias_dataframes_to_json (
1011
1048
cls ,
@@ -1018,6 +1055,42 @@ def bias_dataframes_to_json(
1018
1055
pred_values : str = None ,
1019
1056
json_path : Union [str , Path , None ] = None ,
1020
1057
):
1058
+ """
1059
+ Properly formats data from FairAITools CAS Action Set into a JSON readable formats
1060
+ Parameters
1061
+ ----------
1062
+ groupmetrics: DataFrame
1063
+ A DataFrame containing the group metrics data
1064
+ maxdifference: DataFrame
1065
+ A DataFrame containing the max difference data
1066
+ n_sensitivevariables: int
1067
+ The total number of sensitive values
1068
+ actual_values : String
1069
+ Variable name containing the actual values in score_table. The variable name must follow SAS naming
1070
+ conventions (no spaces and the name cannot begin with a number or symbol).
1071
+ prob_values : list of strings, required for classification problems, otherwise not used
1072
+ A list of variable names containing the predicted probability values in the score table. The first element
1073
+ should represent the predicted probability of the target class. Required for classification problems. Default
1074
+ is None.
1075
+ levels: List of strings, required for classification problems, otherwise not used
1076
+ List of classes of a nominal target in the order they were passed in prob_values. Levels must be passed as a
1077
+ string. Default is None.
1078
+ pred_values : string, required for regression problems, otherwise not used
1079
+ Variable name containing the predicted values in score_table. The variable name must follow SAS naming
1080
+ conventions (no spaces and the name cannot begin with a number or symbol).Required for regression problems.
1081
+ The default value is None.
1082
+ json_path : str or Path, optional
1083
+ Location for the output JSON files. If a path is passed, the json files will populate in the directory and
1084
+ the function will return None, unless return_dataframes is True. Otherwise, the function will return the json
1085
+ strings in a dictionary (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is
1086
+ None.
1087
+
1088
+ Returns
1089
+ -------
1090
+ dict
1091
+ Dictionary containing a key-value pair representing the files name and json
1092
+ dumps respectively.
1093
+ """
1021
1094
folder = "reg_jsons" if prob_values is None else "clf_jsons"
1022
1095
1023
1096
dfs = (maxdifference , groupmetrics )
@@ -1061,20 +1134,20 @@ def bias_dataframes_to_json(
1061
1134
"type" : "num" ,
1062
1135
"values" : [prob_label ],
1063
1136
}
1064
- json_dict [1 ]["parameterMap" ] = cls .add_dict_key (
1065
- dict = json_dict [1 ]["parameterMap" ],
1066
- pos = i + 3 ,
1067
- new_key = prob_label ,
1068
- new_value = paramdict ,
1069
- )
1137
+ json_dict [1 ]["parameterMap" ][prob_label ] = paramdict
1138
+ # cls.add_dict_key(
1139
+ # dict=json_dict[1]["parameterMap"],
1140
+ # pos=i + 3,
1141
+ # new_key=prob_label,
1142
+ # new_value=paramdict,]
1143
+ # )
1070
1144
1071
1145
else :
1072
1146
json_dict [1 ]["parameterMap" ]["predict" ]["label" ] = pred_values
1073
1147
json_dict [1 ]["parameterMap" ]["predict" ]["parameter" ] = pred_values
1074
1148
json_dict [1 ]["parameterMap" ]["predict" ]["values" ] = [pred_values ]
1075
- json_dict [1 ]["parameterMap" ] = cls .rename_dict_key (
1076
- json_dict [1 ]["parameterMap" ], pred_values , "predict"
1077
- )
1149
+ json_dict [1 ]["parameterMap" ][pred_values ] = json_dict [1 ]["parameterMap" ]["predict" ]
1150
+ del json_dict [1 ]["parameterMap" ]["predict" ]
1078
1151
1079
1152
if json_path :
1080
1153
for i , name in enumerate ([MAXDIFFERENCES , GROUPMETRICS ]):
@@ -1091,34 +1164,7 @@ def bias_dataframes_to_json(
1091
1164
GROUPMETRICS : json .dumps (json_dict [1 ], indent = 4 , cls = NpEncoder ),
1092
1165
}
1093
1166
1094
- # TODO: Add doc_string and unit tests
1095
- @staticmethod
1096
- def add_dict_key (
1097
- dict : dict , pos : int , new_key : Union [str , int , float , bool ], new_value
1098
- ):
1099
- result = {}
1100
- for i , k in enumerate (dict .keys ()):
1101
- if i == pos :
1102
- result [new_key ] = new_value
1103
- result [k ] = dict [k ]
1104
- else :
1105
- result [k ] = dict [k ]
1106
- return result
1107
1167
1108
- # TODO: Add doc_string and unit tests
1109
- @staticmethod
1110
- def rename_dict_key (
1111
- dict : dict ,
1112
- new_key : Union [str , int , float , bool ],
1113
- old_key : Union [str , int , float , bool ],
1114
- ) -> dict :
1115
- result = {}
1116
- for k , v in dict .items ():
1117
- if k == old_key :
1118
- result [new_key ] = v
1119
- else :
1120
- result .update ({k : v })
1121
- return result
1122
1168
1123
1169
@classmethod
1124
1170
def calculate_model_statistics (
0 commit comments