Loosen scikit-learn version (#297)

mingkang111 · web-flow · commit 1990929bdde8 · 2023-08-25T13:06:25.000-07:00
diff --git a/ads/hpo/ads_search_space.py b/ads/hpo/ads_search_space.py
@@ -121,7 +121,7 @@ def __init__(self, strategy):
     def suggest_space(self, **kwargs):
         space = {
             "alpha": LogUniformDistribution(10**-4, 10**-1),
-            "penalty": CategoricalDistribution(["l1", "l2", "none"]),
+            "penalty": CategoricalDistribution(["l1", "l2", None]),
         }
         if self.strategy != "perfunctory":
             space.update(
@@ -144,7 +144,6 @@ def __init__(self, strategy):
         super(SVCSearchSpace, self).__init__(strategy)
 
     def suggest_space(self, **kwargs):
-
         space = {
             "C": LogUniformDistribution(10**-4, 10**-1),
             "max_iter": CategoricalDistribution([1000]),
@@ -172,7 +171,6 @@ def __init__(self, strategy):
         super(LinearSVCSearchSpace, self).__init__(strategy)
 
     def suggest_space(self, **kwargs):
-
         space = {
             "C": LogUniformDistribution(10**-4, 10**-1),
             "dual": CategoricalDistribution([False]),
@@ -197,7 +195,6 @@ def __init__(self, strategy):
         super(LinearSVRSearchSpace, self).__init__(strategy)
 
     def suggest_space(self, **kwargs):
-
         space = {"C": LogUniformDistribution(10**-4, 10**-1)}
 
         if self.strategy != "perfunctory":
@@ -217,7 +214,6 @@ def __init__(self, strategy):
         super(DecisionTreeClassifierSearchSpace, self).__init__(strategy)
 
     def suggest_space(self, **kwargs):
-
         space = {
             "max_depth": IntUniformDistribution(1, 5),
             "min_impurity_decrease": UniformDistribution(0, 0.05),
@@ -241,7 +237,6 @@ def __init__(self, strategy):
         super(DecisionTreeRegressorSearchSpace, self).__init__(strategy)
 
     def suggest_space(self, **kwargs):
-
         space = {
             "max_depth": IntUniformDistribution(1, 5),
             "min_impurity_decrease": UniformDistribution(0, 0.05),
@@ -252,7 +247,11 @@ def suggest_space(self, **kwargs):
             space.update(
                 {
                     "criterion": CategoricalDistribution(
-                        ["mse", "friedman_mse", "mae"]
+                        [
+                            "squared_error",
+                            "friedman_mse",
+                            "absolute_error",
+                        ]
                     ),
                     "min_samples_leaf": IntUniformDistribution(2, 500),
                 }
@@ -335,7 +334,6 @@ def __init__(self, strategy):
         super(ExtraTreesClassifierSearchSpace, self).__init__(strategy)
 
     def suggest_space(self, **kwargs):
-
         space = {
             "n_estimators": IntUniformDistribution(50, 250),
             "max_depth": IntUniformDistribution(1, 5),
@@ -374,7 +372,6 @@ def __init__(self, strategy):
         super(GradientBoostingRegressorSearchSpace, self).__init__(strategy)
 
     def suggest_space(self, **kwargs):
-
         space = {
             "max_depth": IntUniformDistribution(1, 5),
             "max_features": CategoricalDistribution(["sqrt", "log2"]),
diff --git a/ads/hpo/objective.py b/ads/hpo/objective.py
@@ -184,7 +184,6 @@ def _cross_validate_with_pruning(
 
         for step in range(self.max_iter):
             for i, (train, test) in enumerate(self.cv.split(X, y, groups=self.groups)):
-
                 out = self._partial_fit_and_score(
                     X, y, estimators[i], train, test, partial_fit_params
                 )
@@ -201,7 +200,6 @@ def _cross_validate_with_pruning(
             trial.report(intermediate_value, step=step)
 
             if trial.should_prune():
-
                 self._store_scores(trial, scores, self.scoring_name)
 
                 raise optuna.TrialPruned(f"trial was pruned at iteration {step}.")
diff --git a/ads/hpo/search_cv.py b/ads/hpo/search_cv.py
@@ -67,7 +67,7 @@ class State(Enum):
     COMPLETED = auto()
 
 
-class InvalidStateTransition(Exception):   # pragma: no cover
+class InvalidStateTransition(Exception):  # pragma: no cover
     """
     `Invalid State Transition` is raised when an invalid transition request is made, such as calling
     halt without a running process.
@@ -76,7 +76,7 @@ class InvalidStateTransition(Exception):   # pragma: no cover
     pass
 
 
-class ExitCriterionError(Exception):   # pragma: no cover
+class ExitCriterionError(Exception):  # pragma: no cover
     """
     `ExitCriterionError` is raised when an attempt is made to check exit status for a different exit
     type than the tuner was initialized with. For example, if an HPO study has an exit criteria based
@@ -87,14 +87,14 @@ class ExitCriterionError(Exception):   # pragma: no cover
     pass
 
 
-class DuplicatedStudyError(Exception):   # pragma: no cover
+class DuplicatedStudyError(Exception):  # pragma: no cover
     """
     `DuplicatedStudyError` is raised when a new tuner process is created with a study name that
     already exists in storage.
     """
 
 
-class NoRestartError(Exception):   # pragma: no cover
+class NoRestartError(Exception):  # pragma: no cover
     """
     `NoRestartError` is raised when an attempt is made to check how many seconds have transpired since
     the HPO process was last resumed from a halt. This can happen if the process has been terminated
@@ -497,7 +497,6 @@ def _get_param_distributions(self, strategy):
         return param_distributions
 
     def _check_search_space(self, param_distributions):
-
         validate_search_space(self.model.get_params().keys(), param_distributions)
 
     def _check_is_fitted(self):
@@ -1044,7 +1043,7 @@ def _extract_estimator(self):
     def _extract_scoring_name(self):
         if isinstance(self.scoring, str):
             return self.scoring
-        if self._scorer.__class__.__name__ != "function":
+        if not callable(self._scorer):
             return (
                 self._scorer
                 if isinstance(self._scorer, str)
diff --git a/ads/type_discovery/typed_feature.py b/ads/type_discovery/typed_feature.py
@@ -349,7 +349,7 @@ def sub_vectorization(
         unigrams = {
             k: int(v)
             for k, v in dict(
-                zip(v1.get_feature_names(), np.asarray(X1.sum(axis=0)).ravel())
+                zip(v1.get_feature_names_out(), np.asarray(X1.sum(axis=0)).ravel())
             ).items()
         }
 
@@ -366,7 +366,7 @@ def sub_vectorization(
         bigrams = {
             k: int(v)
             for k, v in dict(
-                zip(v2.get_feature_names(), np.asarray(X2.sum(axis=0)).ravel())
+                zip(v2.get_feature_names_out(), np.asarray(X2.sum(axis=0)).ravel())
             ).items()
         }
 
@@ -404,7 +404,6 @@ def vectorization(feature_name, series, mean_document_length):
 
     @staticmethod
     def build(name, series, is_cjk, is_html):
-
         internal = {"cjk": is_cjk, "html": is_html}
 
         if is_cjk:
diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
     "python_jsonschema_objects>=0.3.13",
     "PyYAML>=6",  # pyyaml 5.4 is broken with cython 3
     "requests",
-    "scikit-learn>=0.23.2,<1.2",
+    "scikit-learn>=1.0",
     "tabulate>=0.8.9",
     "tqdm>=4.59.0",
     "psutil>=5.7.2",
diff --git a/tests/unitary/with_extras/hpo/test_hpo_search_space.py b/tests/unitary/with_extras/hpo/test_hpo_search_space.py
@@ -6,12 +6,10 @@
 """Contains tests for ads.hpo.search_space
 """
 
-import unittest
 import lightgbm
 import pytest
 import sklearn
 import xgboost
-import sys, mock
 
 from ads.hpo.stopping_criterion import *
 from ads.hpo.distributions import *