adapt-python
diff --git a/‎adapt/base.py
Lines changed: 7 additions & 9 deletions b/‎adapt/base.py
Lines changed: 7 additions & 9 deletions
diff --git a/‎adapt/instance_based/_ldm.py
Lines changed: 2 additions & 0 deletions b/‎adapt/instance_based/_ldm.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎adapt/instance_based/_tradaboost.py
Lines changed: 110 additions & 53 deletions b/‎adapt/instance_based/_tradaboost.py
Lines changed: 110 additions & 53 deletions
@@ -282,8 +282,8 @@ def unsupervised_score(self, Xs, Xt):
         score : float
             Unsupervised score.
         """
-        Xs = check_array(np.array(Xs))
-        Xt = check_array(np.array(Xt))
+        Xs = check_array(Xs, accept_sparse=True)
+        Xt = check_array(Xt, accept_sparse=True)
 
         if hasattr(self, "transform"):
             args = [
@@ -306,13 +306,11 @@ def unsupervised_score(self, Xs, Xt):
 
             set_random_seed(self.random_state)
             bootstrap_index = np.random.choice(
-            len(Xs), size=len(Xs), replace=True, p=sample_weight)
+            Xs.shape[0], size=Xs.shape[0], replace=True, p=sample_weight)
             Xs = Xs[bootstrap_index]
         else:
             raise ValueError("The Adapt model should implement"
                              " a transform or predict_weights methods")
-        Xs = np.array(Xs)
-        Xt = np.array(Xt)
         return normalized_linear_discrepancy(Xs, Xt)
 
 
@@ -534,7 +532,7 @@ def fit_estimator(self, X, y, sample_weight=None,
         -------
         estimator_ : fitted estimator
         """
-        X, y = check_arrays(X, y)
+        X, y = check_arrays(X, y, accept_sparse=True)
         set_random_seed(random_state)
 
         if (not warm_start) or (not hasattr(self, "estimator_")):
@@ -613,7 +611,7 @@ def predict_estimator(self, X, **predict_params):
         y_pred : array
             prediction of estimator.
         """      
-        X = check_array(X, ensure_2d=True, allow_nd=True)
+        X = check_array(X, ensure_2d=True, allow_nd=True, accept_sparse=True)
         predict_params = self._filter_params(self.estimator_.predict,
                                             predict_params)
         return self.estimator_.predict(X, **predict_params)
@@ -648,7 +646,7 @@ def predict(self, X, domain=None, **predict_params):
         y_pred : array
             prediction of the Adapt Model.
         """
-        X = check_array(X, ensure_2d=True, allow_nd=True)
+        X = check_array(X, ensure_2d=True, allow_nd=True, accept_sparse=True)
         if hasattr(self, "transform"):
             if domain is None:
                 domain = "tgt"
@@ -700,7 +698,7 @@ def score(self, X, y, sample_weight=None, domain=None):
         score : float
             estimator score.
         """
-        X, y = check_arrays(X, y)
+        X, y = check_arrays(X, y, accept_sparse=True)
 
         if domain is None:
             domain = "target"
 
@@ -142,6 +142,8 @@ def fit_weights(self, Xs, Xt, **kwargs):
             print("Final Discrepancy : %f"%sol['primal objective'])
 
         self.weights_ = np.array(sol["x"]).ravel()
+        self.lambda_ = self.weights_[0]
+        self.weights_ = np.clip(self.weights_[1:], 0., np.inf)
         return self.weights_
 
 
 
@@ -8,27 +8,29 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.utils import check_array
 from sklearn.metrics import r2_score, accuracy_score
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+from scipy.sparse import vstack, issparse
 
 from adapt.base import BaseAdaptEstimator, make_insert_doc
 from adapt.utils import check_arrays, check_estimator, set_random_seed
 
 EPS = np.finfo(float).eps
 
-def _get_median_predict(X, predictions, weights):
+def _get_median_predict(predictions, weights):
     sorted_idx = np.argsort(predictions, axis=-1)
     # Find index of median prediction for each sample
     weight_cdf = np.cumsum(weights[sorted_idx], axis=-1)
     median_or_above = weight_cdf >= 0.5 * weight_cdf[..., -1][..., np.newaxis]
     median_idx = median_or_above.argmax(axis=-1)
     new_predictions = None
     for i in range(median_idx.shape[1]):
-        median_estimators = sorted_idx[np.arange(len(X)), i, median_idx[:, i]]
+        median_estimators = sorted_idx[np.arange(len(predictions)), i, median_idx[:, i]]
         if new_predictions is None:
-            new_predictions = predictions[np.arange(len(X)), i, median_estimators].reshape(-1,1)
+            new_predictions = predictions[np.arange(len(predictions)), i, median_estimators].reshape(-1,1)
         else:
             new_predictions = np.concatenate((
                 new_predictions,
-                predictions[np.arange(len(X)), i, median_estimators].reshape(-1,1)
+                predictions[np.arange(len(predictions)), i, median_estimators].reshape(-1,1)
             ), axis=1)
     return new_predictions
 
@@ -229,12 +231,17 @@ def fit(self, X, y, Xt=None, yt=None,
         set_random_seed(self.random_state)
         tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
 
-        Xs, ys = check_arrays(X, y)
+        Xs, ys = check_arrays(X, y, accept_sparse=True)
         Xt, yt = self._get_target_data(Xt, yt)
-        Xt, yt = check_arrays(Xt, yt)
+        Xt, yt = check_arrays(Xt, yt, accept_sparse=True)
 
-        n_s = len(Xs) 
-        n_t = len(Xt)
+        if not isinstance(self, TrAdaBoostR2) and isinstance(self.estimator, BaseEstimator):
+            self.label_encoder_ = LabelEncoder()
+            ys = self.label_encoder_.fit_transform(ys)
+            yt = self.label_encoder_.transform(yt)
+        
+        n_s = Xs.shape[0]
+        n_t = Xt.shape[0]
 
         if sample_weight_src is None:
             sample_weight_src = np.ones(n_s) / (n_s + n_t)
@@ -284,8 +291,11 @@ def fit(self, X, y, Xt=None, yt=None,
     def _boost(self, iboost, Xs, ys, Xt, yt,
                sample_weight_src, sample_weight_tgt,
                **fit_params):
-
-        X = np.concatenate((Xs, Xt))
+        
+        if issparse(Xs):
+            X = vstack((Xs, Xt))
+        else:
+            X = np.concatenate((Xs, Xt))
         y = np.concatenate((ys, yt))
         sample_weight = np.concatenate((sample_weight_src,
                                         sample_weight_tgt))
@@ -297,39 +307,72 @@ def _boost(self, iboost, Xs, ys, Xt, yt,
                                        warm_start=False,
                                        **fit_params)
 
-        ys_pred = estimator.predict(Xs)
-        yt_pred = estimator.predict(Xt)
+        if hasattr(estimator, "predict_proba"):
+            ys_pred = estimator.predict_proba(Xs)
+            yt_pred = estimator.predict_proba(Xt)
+        elif hasattr(estimator, "_predict_proba_lr"):
+            ys_pred = estimator._predict_proba_lr(Xs)
+            yt_pred = estimator._predict_proba_lr(Xt)
+        else:
+            ys_pred = estimator.predict(Xs)
+            yt_pred = estimator.predict(Xt)
 
-        if ys_pred.ndim == 1 or ys.ndim == 1:
+        if ys.ndim == 1:
             ys = ys.reshape(-1, 1)
             yt = yt.reshape(-1, 1)
+            
+        if ys_pred.ndim == 1:
             ys_pred = ys_pred.reshape(-1, 1)
             yt_pred = yt_pred.reshape(-1, 1)
 
-        if isinstance(self, TrAdaBoostR2):
+        if not isinstance(self, TrAdaBoostR2):
+            if isinstance(estimator, BaseEstimator):
+                ohe = OneHotEncoder(sparse=False)
+                ohe.fit(y.reshape(-1, 1))
+                ys = ohe.transform(ys)
+                yt = ohe.transform(yt)
+                
+                if ys_pred.shape[1] == 1:
+                    ys_pred = ohe.transform(ys_pred)
+                    yt_pred = ohe.transform(yt_pred)
+                    
+                error_vect_src = np.abs(ys_pred - ys).sum(tuple(range(1, ys.ndim))) / 2.
+                error_vect_tgt = np.abs(yt_pred - yt).sum(tuple(range(1, yt.ndim))) / 2.
+                    
+            else:
+                assert np.all(ys_pred.shape == ys.shape)
+                error_vect_src = np.abs(ys_pred - ys).sum(tuple(range(1, ys.ndim)))
+                error_vect_tgt = np.abs(yt_pred - yt).sum(tuple(range(1, yt.ndim)))
+                
+                if ys.ndim != 1:
+                    error_vect_src /= 2.
+                    error_vect_tgt /= 2.
+                    
+        else:
             error_vect_src = np.abs(ys_pred - ys).mean(tuple(range(1, ys.ndim)))
             error_vect_tgt = np.abs(yt_pred - yt).mean(tuple(range(1, yt.ndim)))
-            error_vect = np.concatenate((error_vect_src, error_vect_tgt))
 
-            error_max = error_vect.max() + EPS
-            if error_max != 0:
-                error_vect /= error_max
+            error_max = max(error_vect_src.max(), error_vect_tgt.max())+ EPS
+            if error_max > 0:
                 error_vect_src /= error_max
                 error_vect_tgt /= error_max
-        else:
-            if isinstance(estimator, BaseEstimator):
-                error_vect_src = (ys_pred != ys).astype(float).ravel()
-                error_vect_tgt = (yt_pred != yt).astype(float).ravel()
-                error_vect = np.concatenate((error_vect_src, error_vect_tgt))
-            else:
-                if ys.shape[1] == 1:
-                    error_vect_src = (np.abs(ys_pred - ys) > 0.5).astype(float).ravel()
-                    error_vect_tgt = (np.abs(yt_pred - yt) > 0.5).astype(float).ravel()
-                else:
-                    error_vect_src = (ys_pred.argmax(1) != ys.argmax(1)).astype(float).ravel()
-                    error_vect_tgt = (yt_pred.argmax(1) != yt.argmax(1)).astype(float).ravel()
+        
+        # else:
+        #     if isinstance(estimator, BaseEstimator):
+        #         error_vect_src = (ys_pred != ys).astype(float).ravel()
+        #         error_vect_tgt = (yt_pred != yt).astype(float).ravel()
+        #         error_vect = np.concatenate((error_vect_src, error_vect_tgt))
+        #     else:
+        #         if ys.shape[1] == 1:
+        #             error_vect_src = (np.abs(ys_pred - ys) > 0.5).astype(float).ravel()
+        #             error_vect_tgt = (np.abs(yt_pred - yt) > 0.5).astype(float).ravel()
+        #         else:
+        #             error_vect_src = (ys_pred.argmax(1) != ys.argmax(1)).astype(float).ravel()
+        #             error_vect_tgt = (yt_pred.argmax(1) != yt.argmax(1)).astype(float).ravel()
 
         error_vect = np.concatenate((error_vect_src, error_vect_tgt))
+        
+        assert sample_weight.ndim == error_vect.ndim
 
         if isinstance(self, _AdaBoostR2):
             estimator_error = (sample_weight * error_vect).sum()
@@ -341,10 +384,16 @@ def _boost(self, iboost, Xs, ys, Xt, yt,
         # if estimator_error > 0.49:
         #     estimator_error = 0.49
 
+        self.estimators_.append(estimator)
+        self.estimator_errors_.append(estimator_error)
+        
+        if estimator_error <= 0.:
+            return None, None
+        
         beta_t = estimator_error / (2. - estimator_error)
 
         beta_s = 1. / (1. + np.sqrt(
-            2. * np.log(len(Xs)) / self.n_estimators
+            2. * np.log(Xs.shape[0]) / self.n_estimators
         ))
 
         if not iboost == self.n_estimators - 1:
@@ -362,9 +411,6 @@ def _boost(self, iboost, Xs, ys, Xt, yt,
                 # Target updating weights
                 sample_weight_tgt *= np.power(
                     beta_t, - self.lr * error_vect_tgt)
-
-        self.estimators_.append(estimator)
-        self.estimator_errors_.append(estimator_error)
 
         return sample_weight_src, sample_weight_tgt
 
@@ -383,14 +429,21 @@ def predict(self, X):
         y_pred : array
             Vote results.
         """
-        X = check_array(X)
+        X = check_array(X, ensure_2d=True, allow_nd=True, accept_sparse=True)
         N = len(self.estimators_)
         weights = np.array(self.estimator_weights_)
         weights = weights[int(N/2):]
         predictions = []
         for est in self.estimators_[int(N/2):]:
             if isinstance(est, BaseEstimator):
-                y_pred = est.predict_proba(X)
+                if hasattr(est, "predict_proba"):
+                    y_pred = est.predict_proba(X)
+                elif hasattr(est, "_predict_proba_lr"):
+                    y_pred = est._predict_proba_lr(X)
+                else:
+                    labels = est.predict(X)
+                    y_pred = np.zeros((len(labels), int(max(labels))+1))
+                    y_pred[np.arange(len(labels)), labels] = 1.
             else:
                 y_pred = est.predict(X)
                 if y_pred.ndim == 1:
@@ -401,7 +454,10 @@ def predict(self, X):
             predictions.append(y_pred)
         predictions = np.stack(predictions, -1)
         weighted_vote = predictions.dot(weights).argmax(1)
-        return weighted_vote
+        if hasattr(self, "label_encoder_"):
+            return self.label_encoder_.inverse_transform(weighted_vote)
+        else:
+            return weighted_vote
 
 
     def predict_weights(self, domain="src"):
@@ -454,7 +510,7 @@ def score(self, X, y):
         score : float
             estimator score.
         """
-        X, y = check_arrays(X, y)
+        X, y = check_arrays(X, y, accept_sparse=True)
         yp = self.predict(X)
         if isinstance(self, TrAdaBoostR2):
             score = r2_score(y, yp)
@@ -587,7 +643,7 @@ def predict(self, X):
         y_pred : array
             Median results.
         """
-        X = check_array(X)
+        X = check_array(X, ensure_2d=True, allow_nd=True, accept_sparse=True)
         N = len(self.estimators_)
         weights = np.array(self.estimator_weights_)
         weights = weights[int(N/2):]
@@ -598,7 +654,7 @@ def predict(self, X):
                 y_pred = y_pred.reshape(-1, 1)
             predictions.append(y_pred)
         predictions = np.stack(predictions, -1)
-        return _get_median_predict(X, predictions, weights)
+        return _get_median_predict(predictions, weights)
 
 
 class _AdaBoostR2(TrAdaBoostR2):
@@ -770,12 +826,12 @@ def fit(self, X, y, Xt=None, yt=None,
         set_random_seed(self.random_state)
         tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
 
-        Xs, ys = check_arrays(X, y)
+        Xs, ys = check_arrays(X, y, accept_sparse=True)
         Xt, yt = self._get_target_data(Xt, yt)
-        Xt, yt = check_arrays(Xt, yt)
+        Xt, yt = check_arrays(Xt, yt, accept_sparse=True)
 
-        n_s = len(Xs) 
-        n_t = len(Xt)
+        n_s = Xs.shape[0]
+        n_t = Xt.shape[0]
 
         if sample_weight_src is None:
             sample_weight_src = np.ones(n_s) / (n_s + n_t)
@@ -786,9 +842,6 @@ def fit(self, X, y, Xt=None, yt=None,
                        sample_weight_tgt.sum())
         sample_weight_src = sample_weight_src / sum_weights
         sample_weight_tgt = sample_weight_tgt / sum_weights
-        
-        X = np.concatenate((Xs, Xt))
-        y = np.concatenate((ys, yt))
 
         self.sample_weights_src_ = []
         self.sample_weights_tgt_ = []
@@ -901,13 +954,13 @@ def func(x):
     def _cross_val_score(self, Xs, ys, Xt, yt,
                          sample_weight_src, sample_weight_tgt,
                          **fit_params):
-        if len(Xt) >= self.cv:
+        if Xt.shape[0] >= self.cv:
             cv = self.cv
         else:
-            cv = len(Xt)
+            cv = Xt.shape[0]
 
-        tgt_index = np.arange(len(Xt))
-        split = int(len(Xt) / cv)
+        tgt_index = np.arange(Xt.shape[0])
+        split = int(Xt.shape[0] / cv)
         scores = []
         for i in range(cv):
             if i == cv-1:
@@ -916,7 +969,11 @@ def _cross_val_score(self, Xs, ys, Xt, yt,
                 test_index = tgt_index[i * split: (i + 1) * split]
             train_index = list(set(tgt_index) - set(test_index))
 
-            X = np.concatenate((Xs, Xt[train_index]))
+                    
+            if issparse(Xs):
+                X = vstack((Xs, Xt[train_index]))
+            else:
+                X = np.concatenate((Xs, Xt[train_index]))
             y = np.concatenate((ys, yt[train_index]))
             sample_weight = np.concatenate((sample_weight_src,
                                             sample_weight_tgt[train_index]))
@@ -956,7 +1013,7 @@ def predict(self, X):
         y_pred : array
             Best estimator predictions.
         """
-        X = check_array(X)
+        X = check_array(X, ensure_2d=True, allow_nd=True, accept_sparse=True)
         best_estimator = self.estimators_[
             np.argmin(self.estimator_errors_)]
         return best_estimator.predict(X)