oracle
diff --git a/‎ads/dataset/helper.py
Lines changed: 25 additions & 27 deletions b/‎ads/dataset/helper.py
Lines changed: 25 additions & 27 deletions
diff --git a/‎ads/dataset/recommendation_transformer.py
Lines changed: 3 additions & 4 deletions b/‎ads/dataset/recommendation_transformer.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎ads/evaluations/evaluation_plot.py
Lines changed: 4 additions & 5 deletions b/‎ads/evaluations/evaluation_plot.py
Lines changed: 4 additions & 5 deletions
diff --git a/‎ads/feature_engineering/accessor/dataframe_accessor.py
Lines changed: 1 addition & 1 deletion b/‎ads/feature_engineering/accessor/dataframe_accessor.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/feature_engineering/accessor/mixin/correlation.py
Lines changed: 3 additions & 3 deletions b/‎ads/feature_engineering/accessor/mixin/correlation.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎ads/feature_engineering/accessor/mixin/eda_mixin.py
Lines changed: 2 additions & 2 deletions b/‎ads/feature_engineering/accessor/mixin/eda_mixin.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎ads/feature_engineering/accessor/mixin/eda_mixin_series.py
Lines changed: 2 additions & 2 deletions b/‎ads/feature_engineering/accessor/mixin/eda_mixin_series.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎ads/feature_engineering/accessor/mixin/feature_types_mixin.py
Lines changed: 5 additions & 5 deletions b/‎ads/feature_engineering/accessor/mixin/feature_types_mixin.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎ads/feature_engineering/feature_type/creditcard.py
Lines changed: 2 additions & 1 deletion b/‎ads/feature_engineering/feature_type/creditcard.py
Lines changed: 2 additions & 1 deletion
@@ -314,7 +314,6 @@ def _get_dtype_from_error(e):
     error_string = str(e)
 
     if "mismatched dtypes" in error_string.lower():
-
         # For the mismatched dtypes error, dask either returns a error message containing the dtype argument
         # to specify, or  the found and expected dtypes in a table format, depending on what stage
         # the type inferencing fails. The below logic supports building the dtype dictionary for both cases
@@ -732,8 +731,8 @@ def down_sample(df, target):
     """
     dfs = []
     target_value_counts = df[target].value_counts()
-    min_key = min(target_value_counts.iteritems(), key=lambda k: k[1])
-    for key, value in target_value_counts.iteritems():
+    min_key = min(target_value_counts.items(), key=lambda k: k[1])
+    for key, value in target_value_counts.items():
         if key != min_key[0]:
             dfs.append(
                 df[df[target] == key].sample(frac=1 - ((value - min_key[1]) / value))
@@ -835,6 +834,7 @@ def _log_yscale_not_set():
         "`yscale` parameter is not set. Valid values are `'linear'`, `'log'`, `'symlog'`."
     )
 
+
 def infer_target_type(target, target_series, discover_target_type=True):
     # if type discovery is turned off, infer type from pandas dtype
     if discover_target_type:
@@ -845,13 +845,15 @@ def infer_target_type(target, target_series, discover_target_type=True):
         target_type = get_feature_type(target, target_series)
     return target_type
 
+
 def get_target_type(target, sampled_df, **init_kwargs):
     discover_target_type = init_kwargs.get("type_discovery", True)
     if target in init_kwargs.get("types", {}):
         sampled_df[target] = sampled_df[target].astype(init_kwargs.get("types")[target])
         discover_target_type = False
     return infer_target_type(target, sampled_df[target], discover_target_type)
 
+
 def get_dataset(
     df: pd.DataFrame,
     sampled_df: pd.DataFrame,
@@ -860,12 +862,12 @@ def get_dataset(
     shape: Tuple[int, int],
     positive_class=None,
     **init_kwargs,
-):  
+):
     from ads.dataset.classification_dataset import (
-        BinaryClassificationDataset, 
-        BinaryTextClassificationDataset, 
-        MultiClassClassificationDataset, 
-        MultiClassTextClassificationDataset
+        BinaryClassificationDataset,
+        BinaryTextClassificationDataset,
+        MultiClassClassificationDataset,
+        MultiClassTextClassificationDataset,
     )
     from ads.dataset.forecasting_dataset import ForecastingDataset
     from ads.dataset.regression_dataset import RegressionDataset
@@ -874,9 +876,7 @@ def get_dataset(
         logger.warning(
             "It is not recommended to use an empty column as the target variable."
         )
-        raise ValueError(
-            f"We do not support using empty columns as the chosen target"
-        )
+        raise ValueError(f"We do not support using empty columns as the chosen target")
     if utils.is_same_class(target_type, ContinuousTypedFeature):
         return RegressionDataset(
             df=df,
@@ -899,9 +899,9 @@ def get_dataset(
         )
 
     # Adding ordinal typed feature, but ultimately we should rethink how we want to model this type
-    elif utils.is_same_class(target_type, CategoricalTypedFeature) or utils.is_same_class(
-        target_type, OrdinalTypedFeature
-    ):
+    elif utils.is_same_class(
+        target_type, CategoricalTypedFeature
+    ) or utils.is_same_class(target_type, OrdinalTypedFeature):
         if target_type.meta_data["internal"]["unique"] == 2:
             if is_text_data(sampled_df, target):
                 return BinaryTextClassificationDataset(
@@ -946,17 +946,13 @@ def get_dataset(
         or "text" in target_type["type"]
         or "text" in target
     ):
-        raise ValueError(
-            f"The column {target} cannot be used as the target column."
-        )
+        raise ValueError(f"The column {target} cannot be used as the target column.")
     elif (
         utils.is_same_class(target_type, GISTypedFeature)
         or "coord" in target_type["type"]
         or "coord" in target
     ):
-        raise ValueError(
-            f"The column {target} cannot be used as the target column."
-        )
+        raise ValueError(f"The column {target} cannot be used as the target column.")
     # This is to catch constant columns that are boolean. Added as a fix for pd.isnull(), and datasets with a
     #   binary target, but only data on one instance
     elif target_type["low_level_type"] == "bool":
@@ -974,6 +970,7 @@ def get_dataset(
         f"For example, types = {{{target}: 'category'}}"
     )
 
+
 def open(
     source,
     target=None,
@@ -1074,9 +1071,7 @@ def open(
         progress.update("Opening data")
         path = ElaboratedPath(source, format=format, **kwargs)
         reader_fn = (
-            get_format_reader(path=path, **kwargs)
-            if reader_fn is None
-            else reader_fn
+            get_format_reader(path=path, **kwargs) if reader_fn is None else reader_fn
         )
         df = load_dataset(path=path, reader_fn=reader_fn, **kwargs)
         name = path.name
@@ -1108,6 +1103,7 @@ def open(
         ),
     )
 
+
 def build_dataset(
     df: pd.DataFrame,
     shape: Tuple[int, int],
@@ -1149,9 +1145,7 @@ def build_dataset(
         discover_target_type = False
 
     # if type discovery is turned off, infer type from pandas dtype
-    target_type = infer_target_type(
-        target, sampled_df[target], discover_target_type
-    )
+    target_type = infer_target_type(target, sampled_df[target], discover_target_type)
 
     result = get_dataset(
         df=df,
@@ -1168,6 +1162,7 @@ def build_dataset(
     )
     return result
 
+
 class CustomFormatReaders:
     @staticmethod
     def read_tsv(path: str, **kwargs) -> pd.DataFrame:
@@ -1352,7 +1347,6 @@ def read_xml(path: str, **kwargs) -> pd.DataFrame:
         import xml.etree.cElementTree as et
 
         def get_children(df, node, parent, i):
-
             for name in node.attrib.keys():
                 df.at[i, parent + name] = node.attrib[name]
             for child in list(node):
@@ -1374,6 +1368,7 @@ def get_children(df, node, parent, i):
                 last_i = i
         return ret_df
 
+
 reader_fns = {
     "csv": pd.read_csv,
     "tsv": CustomFormatReaders.read_tsv,
@@ -1399,13 +1394,15 @@ def get_children(df, node, parent, i):
     "xml": CustomFormatReaders.read_xml,
 }
 
+
 def validate_kwargs(func: Callable, kwargs):
     valid_params = inspect.signature(func).parameters
     if "kwargs" in valid_params:
         return kwargs
     else:
         return {k: v for k, v in kwargs.items() if k in valid_params}
 
+
 def get_format_reader(path: ElaboratedPath, **kwargs) -> Callable:
     format_key = path.format
     try:
@@ -1420,6 +1417,7 @@ def get_format_reader(path: ElaboratedPath, **kwargs) -> Callable:
 
     return reader_fn
 
+
 def load_dataset(path: ElaboratedPath, reader_fn: Callable, **kwargs) -> pd.DataFrame:
     dfs = []
     for filename in path.paths:
 
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8; -*-
 
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from __future__ import print_function, absolute_import
@@ -131,7 +131,6 @@ def _get_recommendations(self, df):
         self.feature_metadata_[self.target_] = self.target_type_
 
         for column in df.columns.values[df.isnull().any()]:
-
             # filter out columns that were discovered as constant or primary key columns in the previous step,
             # as they would get dropped before imputation
             if (
@@ -246,10 +245,10 @@ def _get_recommendations(self, df):
             if not self.is_balanced and self.fix_imbalance:
                 target_value_counts = df[self.target_].value_counts()
                 minority_class_len = min(
-                    target_value_counts.iteritems(), key=lambda k: k[1]
+                    target_value_counts.items(), key=lambda k: k[1]
                 )[1]
                 majority_class_len = max(
-                    target_value_counts.iteritems(), key=lambda k: k[1]
+                    target_value_counts.items(), key=lambda k: k[1]
                 )[1]
                 minor_majority_ratio = minority_class_len / majority_class_len
 
 
@@ -447,7 +447,7 @@ def _lift_and_gain_chart(cls, ax, evaluation):
 
     @classmethod
     def _lift_chart(cls, ax, evaluation):
-        for mod_name, col in evaluation.iteritems():
+        for mod_name, col in evaluation.items():
             if col["y_score"] is not None:
                 ax.plot(
                     col["percentages"][1:],
@@ -476,7 +476,7 @@ def _lift_chart(cls, ax, evaluation):
 
     @classmethod
     def _gain_chart(cls, ax, evaluation):
-        for mod_name, col in evaluation.iteritems():
+        for mod_name, col in evaluation.items():
             if col["y_score"] is not None:
                 ax.plot(
                     col["percentages"],
@@ -517,7 +517,7 @@ def _pr_curve(cls, axs, evaluation):
                 ax.axis("off")
                 return
             if cls.prob_type == "_bin":
-                for mod_name, col in evaluation.iteritems():
+                for mod_name, col in evaluation.items():
                     if col["y_score"] is not None:
                         ax.plot(
                             col["recall_values"],
@@ -589,7 +589,7 @@ def _roc_curve(cls, axs, evaluation):
                 ax.axis("off")
                 return
             if cls.prob_type == "_bin":
-                for mod_name, col in evaluation.iteritems():
+                for mod_name, col in evaluation.items():
                     if col["y_score"] is not None:
                         ax.plot(
                             col["false_positive_rate"],
@@ -803,7 +803,6 @@ def _pretty_scatter(
         label=None,
         plot_kwargs=None,
     ):
-
         if plot_kwargs is None:
             plot_kwargs = {}
         ax.scatter(x, y, s=s, label=label, marker="o", alpha=alpha, **plot_kwargs)
 
@@ -218,7 +218,7 @@ def feature_type_description(self) -> pd.DataFrame:
         for col in self._obj:
             series_feature_type_df = self._obj[col].ads.feature_type_description
             series_feature_type_df.insert(0, "Column", col)
-            result_df = result_df.append(series_feature_type_df)
+            result_df = pd.concat([result_df, series_feature_type_df])
         result_df.reset_index(drop=True, inplace=True)
         return result_df
 
 
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from __future__ import print_function, absolute_import
@@ -68,7 +68,7 @@ def _list_to_dataframe(
     correlation_matrix = correlation_matrix.loc[:, correlation_matrix.index]
     if normal_form:
         data = []
-        for (col1, col2), corr in correlation_matrix.stack().iteritems():
+        for (col1, col2), corr in correlation_matrix.stack().items():
             data.append([col1, col2, round(corr, 4)])
         return pd.DataFrame(data, columns=["Column 1", "Column 2", "Value"])
     else:
@@ -161,6 +161,6 @@ def cont_vs_cont(df: pd.DataFrame, normal_form: bool = True) -> pd.DataFrame:
     if not normal_form:
         return df.corr(method="pearson")
     data = []
-    for (col1, col2), corr in df.corr(method="pearson").stack().iteritems():
+    for (col1, col2), corr in df.corr(method="pearson").stack().items():
         data.append([col1, col2, round(corr, 4)])
     return pd.DataFrame(data, columns=["Column 1", "Column 2", "Value"])
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 """
@@ -262,5 +262,5 @@ def warning(self) -> pd.DataFrame:
             warning_df = self._obj[col].ads.warning()
             if warning_df is not None:
                 warning_df.insert(0, "Column", col)
-                result_df = result_df.append(warning_df)
+                result_df = pd.concat([result_df, warning_df])
         return result_df.reset_index(drop=True)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 """
@@ -80,6 +80,6 @@ def warning(self) -> pd.DataFrame:
                 warning_df = feature_type.warning(self._obj)
                 if warning_df is not None:
                     warning_df.insert(0, "Feature Type", feature_type.name)
-                    result_df = result_df.append(warning_df)
+                    result_df = pd.concat([result_df, warning_df])
         result_df.reset_index(drop=True, inplace=True)
         return result_df
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 """
@@ -115,14 +115,14 @@ def warning_registered(self) -> pd.DataFrame:
             for col in self._obj.columns:
                 feature_type_df = self._obj[col].ads.warning_registered()
                 feature_type_df.insert(0, "Column", col)
-                result_df = result_df.append(feature_type_df)
+                result_df = pd.concat([result_df, feature_type_df])
         else:
             result_df = pd.DataFrame((), columns=common_columns)
             for feature_type in self._feature_type:
                 feature_type_df = feature_type.warning.registered()
                 feature_type_df.insert(0, "Feature Type", feature_type.name)
                 feature_type_df = feature_type_df.rename(columns={"Name": "Warning"})
-                result_df = result_df.append(feature_type_df)
+                result_df = pd.concat([result_df, feature_type_df])
         result_df.reset_index(drop=True, inplace=True)
         return result_df
 
@@ -155,14 +155,14 @@ def validator_registered(self) -> pd.DataFrame:
             for col in self._obj.columns:
                 feature_type_df = self._obj[col].ads.validator_registered()
                 feature_type_df.insert(0, "Column", col)
-                result_df = result_df.append(feature_type_df)
+                result_df = pd.concat([result_df, feature_type_df])
         else:
             result_df = pd.DataFrame((), columns=common_columns)
             for feature_type in self._feature_type:
                 feature_type_df = feature_type.validator.registered()
                 feature_type_df.insert(0, "Feature Type", feature_type.name)
                 feature_type_df = feature_type_df.rename(columns={"Name": "Validator"})
-                result_df = result_df.append(feature_type_df)
+                result_df = pd.concat([result_df, feature_type_df])
         result_df.reset_index(drop=True, inplace=True)
         return result_df
 
 
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 """
@@ -198,6 +198,7 @@ def feature_stat(x: pd.Series):
         df_stat = _count_unique_missing(x)
         card_types = x.apply(assign_issuer)
         value_counts = card_types.value_counts()
+        value_counts.rename("creditcard", inplace=True)
         value_counts.index = [
             "count_" + cardtype for cardtype in list(value_counts.index)
         ]