Update tradaboost

antoinedemathelin · antoinedemathelin · commit 27e132023996 · 2022-03-12T09:45:05.000+01:00
diff --git a/adapt/instance_based/_tradaboost.py b/adapt/instance_based/_tradaboost.py
@@ -275,10 +275,9 @@ def fit(self, X, y, Xt=None, yt=None,
             sample_weight_src = sample_weight_src / sum_weights
             sample_weight_tgt = sample_weight_tgt / sum_weights
 
-        self.estimator_errors_ = np.array(self.estimator_errors_)
-        self.estimator_weights_ = np.array([
-            -np.log(err / (1-err) + EPS) + 2*EPS
-            for err in self.estimator_errors_])
+        self.estimator_weights_ = [
+            -np.log(err / (2.-err) + EPS) + 2*EPS
+            for err in self.estimator_errors_]
         return self
         
         
@@ -339,10 +338,10 @@ def _boost(self, iboost, Xs, ys, Xt, yt,
                                sample_weight_tgt.sum())
         
         # For multiclassification and regression error can be greater than 0.5
-        # if estimator_error > 0.5:
-        #     estimator_error = 0.5
+        # if estimator_error > 0.49:
+        #     estimator_error = 0.49
         
-        beta_t = 2*estimator_error / (2. - estimator_error)
+        beta_t = estimator_error / (2. - estimator_error)
         
         beta_s = 1. / (1. + np.sqrt(
             2. * np.log(len(Xs)) / self.n_estimators
@@ -386,7 +385,8 @@ def predict(self, X):
         """
         X = check_array(X)
         N = len(self.estimators_)
-        weights = self.estimator_weights_[int(N/2):]
+        weights = np.array(self.estimator_weights_)
+        weights = weights[int(N/2):]
         predictions = []
         for est in self.estimators_[int(N/2):]:
             if isinstance(est, BaseEstimator):
@@ -457,9 +457,9 @@ def score(self, X, y):
         X, y = check_arrays(X, y)
         yp = self.predict(X)
         if isinstance(self, TrAdaBoostR2):
-            score = r2_score(yp, y)
+            score = r2_score(y, yp)
         else:
-            score = accuracy_score(yp, y)
+            score = accuracy_score(y, yp)
         return score
 
 
@@ -589,7 +589,7 @@ def predict(self, X):
         """
         X = check_array(X)
         N = len(self.estimators_)
-        weights = self.estimator_weights_
+        weights = np.array(self.estimator_weights_)
         weights = weights[int(N/2):]
         predictions = []
         for est in self.estimators_[int(N/2):]:
@@ -811,13 +811,13 @@ def fit(self, X, y, Xt=None, yt=None,
                 print("Iteration %i - Cross-validation score: %.4f (%.4f)"%
                       (iboost, np.mean(cv_score), np.std(cv_score)))
             
-            self.estimator_errors_.append(cv_score.mean())
-            
             sample_weight_src, sample_weight_tgt = self._boost(
                 iboost, Xs, ys, Xt, yt,
                 sample_weight_src, sample_weight_tgt,
                 **fit_params
             )
+            
+            self.estimator_errors_.append(cv_score.mean())
 
             if sample_weight_src is None:
                 break
@@ -827,7 +827,6 @@ def fit(self, X, y, Xt=None, yt=None,
             sample_weight_src = sample_weight_src / sum_weights
             sample_weight_tgt = sample_weight_tgt / sum_weights
 
-        self.estimator_errors_ = np.array(self.estimator_errors_)
         return self
 
 
@@ -959,7 +958,7 @@ def predict(self, X):
         """
         X = check_array(X)
         best_estimator = self.estimators_[
-            self.estimator_errors_.argmin()]
+            np.argmin(self.estimator_errors_)]
         return best_estimator.predict(X)
 
 
@@ -984,7 +983,7 @@ def predict_weights(self, domain="src"):
         weights : source sample weights
         """
         if hasattr(self, "sample_weights_src_"):
-            arg = self.estimator_errors_.argmin()
+            arg = np.argmin(self.estimator_errors_)
             if domain in ["src", "source"]:
                 return self.sample_weights_src_[arg]
             elif domain in ["tgt", "target"]:
diff --git a/tests/test_tradaboost.py b/tests/test_tradaboost.py
@@ -4,7 +4,8 @@
 
 import copy
 import numpy as np
-from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge
+from sklearn.metrics import r2_score, accuracy_score
 import tensorflow as tf
 
 from adapt.instance_based import (TrAdaBoost,
@@ -34,6 +35,8 @@ def test_tradaboost_fit():
                        solver='lbfgs'),
                        n_estimators=20)
     model.fit(Xs, ys_classif, Xt=Xt[:10], yt=yt_classif[:10])
+    score = model.score(Xs, ys_classif)
+    assert score == accuracy_score(ys_classif, model.predict(Xs))
     assert len(model.sample_weights_src_[0]) == 100
     assert (model.sample_weights_src_[0][:50].sum() ==
             model.sample_weights_src_[0][50:].sum())
@@ -58,13 +61,18 @@ def test_tradaboost_fit_keras_model():
     model.fit(Xs, np.random.random((100, 2)),
               Xt=Xt[:10], yt=np.random.random((10, 2)))
     
+    score = model.score(Xs, ys_classif)
+    assert score == accuracy_score(ys_classif, model.predict(Xs))
+    
     
 def test_tradaboostr2_fit():
     np.random.seed(0)
     model = TrAdaBoostR2(LinearRegression(fit_intercept=False),
                          n_estimators=100,
                          Xt=Xt[:10], yt=yt_reg[:10])
     model.fit(Xs, ys_reg)
+    score = model.score(Xs, ys_reg)
+    assert score == r2_score(ys_reg, model.predict(Xs))
     assert np.abs(model.estimators_[-1].coef_[0] - 1.) < 1
     assert np.abs(model.sample_weights_src_[-1][:50].sum() / 
             model.sample_weights_src_[-1][50:].sum()) > 10
@@ -80,7 +88,9 @@ def test_twostagetradaboostr2_fit():
     np.random.seed(0)
     model = TwoStageTrAdaBoostR2(LinearRegression(fit_intercept=False),
                          n_estimators=10)
-    model.fit(Xs, ys_reg.ravel(), Xt=Xt[:10], yt=yt_reg[:10].ravel())    
+    model.fit(Xs, ys_reg.ravel(), Xt=Xt[:10], yt=yt_reg[:10].ravel())
+    score = model.score(Xs, ys_reg)
+    assert score == r2_score(ys_reg, model.predict(Xs))
     assert np.abs(model.estimators_[-1].estimators_[-1].coef_[0]
            - 1.) < 1
     assert np.abs(model.sample_weights_src_[-1][:50].sum() / 
@@ -103,3 +113,75 @@ def test_tradaboost_deepcopy():
     copy_model = copy.deepcopy(model)
     assert np.all(model.predict(Xt) == copy_model.predict(Xt))
     assert hex(id(model)) != hex(id(copy_model))
+    
+    
+def test_tradaboost_multiclass():
+    np.random.seed(0)
+    X = np.random.randn(10, 3)
+    y = np.random.choice(3, 10)
+    model = TrAdaBoost(LogisticRegression(penalty='none',
+                       solver='lbfgs'), Xt=X, yt=y,
+                       n_estimators=20)
+    model.fit(X, y)
+    yp = model.predict(X)
+    score = model.score(X, y)
+    assert set(np.unique(yp)) == set([0,1,2])
+    assert score == accuracy_score(y, yp)
+    
+    
+def test_tradaboost_multireg():
+    np.random.seed(0)
+    X = np.random.randn(10, 3)
+    y = np.random.randn(10, 5)
+    model = TrAdaBoostR2(LinearRegression(),
+                         Xt=X, yt=y, 
+                         n_estimators=20)
+    model.fit(X, y)
+    yp = model.predict(X)
+    score = model.score(X, y)
+    assert np.all(yp.shape == (10, 5))
+    assert score == r2_score(y, yp)
+    
+    model = TwoStageTrAdaBoostR2(LinearRegression(),
+                         Xt=X, yt=y, 
+                         n_estimators=3,
+                         n_estimators_fs=3)
+    model.fit(X, y)
+    yp = model.predict(X)
+    score = model.score(X, y)
+    assert np.all(yp.shape == (10, 5))
+    assert score == r2_score(y, yp)
+    
+    
+def test_tradaboost_above_05():
+    np.random.seed(0)
+    X = np.random.randn(10, 3)
+    y = np.random.randn(10, 5)
+    model = TrAdaBoostR2(LinearRegression(),
+                         Xt=Xt[:10], yt=yt_reg[:10], 
+                         n_estimators=20)
+    model.fit(Xs, ys_reg)
+    assert np.any(np.array(model.estimator_errors_)>0.5)
+    
+    model = TrAdaBoostR2(Ridge(1.),
+                         Xt=Xt[:20], yt=yt_reg[:20], 
+                         n_estimators=20)
+    model.fit(Xs, ys_reg)
+    assert np.all(np.array(model.estimator_errors_)<0.5)
+    
+    
+def test_tradaboost_lr():
+    np.random.seed(0)
+    model = TrAdaBoost(LogisticRegression(penalty='none'),
+                         Xt=Xt[:10], yt=yt_classif[:10], 
+                         n_estimators=20, lr=.1)
+    model.fit(Xs, ys_classif)
+    err1 = model.estimator_errors_
+    
+    model = TrAdaBoost(LogisticRegression(penalty='none'),
+                         Xt=Xt[:10], yt=yt_classif[:10], 
+                         n_estimators=20, lr=2.)
+    model.fit(Xs, ys_classif)
+    err2 = model.estimator_errors_
+    
+    assert np.sum(err1) > 10 * np.sum(err2)