diff --git a/proglearn/deciders.py b/proglearn/deciders.py
index 3d5c3412bb..0f4f13c7c2 100755
--- a/proglearn/deciders.py
+++ b/proglearn/deciders.py
@@ -89,6 +89,7 @@ def fit(
             self.classes = np.array(self.classes)
         self.transformer_id_to_transformers_ = transformer_id_to_transformers
         self.transformer_id_to_voters_ = transformer_id_to_voters
+
         return self
 
     def predict_proba(self, X, transformer_ids=None):
@@ -115,7 +116,6 @@ def predict_proba(self, X, transformer_ids=None):
         y_proba_hat : ndarray of shape [n_samples, n_classes]
             posteriors per example
 
-
         Raises
         ------
         NotFittedError
@@ -123,6 +123,7 @@ def predict_proba(self, X, transformer_ids=None):
         """
         check_is_fitted(self)
         vote_per_transformer_id = []
+        prior_posterior_per_id = []
         for transformer_id in (
             transformer_ids
             if transformer_ids is not None
@@ -130,18 +131,38 @@ def predict_proba(self, X, transformer_ids=None):
         ):
             check_is_fitted(self)
             vote_per_bag_id = []
+            prior_posterior_per_bag = []
             for bag_id in range(
                 len(self.transformer_id_to_transformers_[transformer_id])
             ):
                 transformer = self.transformer_id_to_transformers_[transformer_id][
                     bag_id
                 ]
+                # X.shape = (n_samples, n_features)
                 X_transformed = transformer.transform(X)
+                # X_transformed.shape = (n_samples,)
                 voter = self.transformer_id_to_voters_[transformer_id][bag_id]
                 vote = voter.predict_proba(X_transformed)
+                # vote.shape = (n_samples, n_classes)
                 vote_per_bag_id.append(vote)
-            vote_per_transformer_id.append(np.mean(vote_per_bag_id, axis=0))
-        return np.mean(vote_per_transformer_id, axis=0)
+
+                prior_posterior_per_bag.append(voter.prior_posterior_)
+            # Each sample gets the average over transformers. Exclude all zeros in the mean
+            # vote_per_bag_id.shape = (n_transformers, n_samples, n_classes)
+            transformer_vote = np.sum(vote_per_bag_id, axis=0)
+            num_transformers = np.sum(vote_per_bag_id, axis=2).sum(axis=0)[:, None]
+            vote_per_transformer_id.append(np.divide(
+                transformer_vote, num_transformers, out=np.zeros_like(transformer_vote), where=num_transformers!=0))
+                
+            prior_posterior_per_id.append(np.mean(prior_posterior_per_bag, axis=0))
+        
+        # vote_per_transformer_id.shape = (1, n_samples, n_classes)
+        predicted_posteriors = np.mean(vote_per_transformer_id, axis=0)
+        # Correction for samples not predicted by any tree
+        unknown_sample_indices = np.where(np.sum(predicted_posteriors, axis=1) == 0)[0]
+        predicted_posteriors[unknown_sample_indices] = np.mean(prior_posterior_per_id, axis=0)
+
+        return predicted_posteriors
 
     def predict(self, X, transformer_ids=None):
         """
diff --git a/proglearn/forest.py b/proglearn/forest.py
index 80bac5ea3b..75db56cd77 100644
--- a/proglearn/forest.py
+++ b/proglearn/forest.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.utils.validation import check_X_y, check_array
 
 
@@ -35,6 +36,30 @@ class LifelongClassificationForest(ClassificationProgressiveLearner):
         The maximum depth of a tree in the Lifelong Classification Forest.
         This is used if 'max_depth' is not fed to add_task.
 
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. ``-1`` means use all
+        processors.
+
+    max_samples : int or float, default=0.5
+        The number of samples to draw from X (without replacement) to train
+        each tree.
+        - If None, then draw `X.shape[0]` samples.
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples. Thus,
+          `max_samples` should be in the interval `(0, 1)`.
+
+        Note: The number of samples used to learn the tree will be further
+        reduced per the `tree_construction_proportion` value.
+
+    honest_prior : {"ignore", "uniform", "empirical"}, default="ignore"
+        Method for dealing with empty leaves during evaluation of a test
+        sample. If "ignore", trees in which the leaf is empty are not used in
+        the prediction. If "uniform", the prior tree posterior is 1/(number of
+        classes). If "empirical", the prior tree posterior is the relative
+        class frequency in the voting subsample. If all tree leaves are empty,
+        "ignore" will use the empirical prior and the others will use their
+        respective priors.
+
     Attributes
     ----------
     pl_ : ClassificationProgressiveLearner
@@ -48,11 +73,17 @@ def __init__(
         default_tree_construction_proportion=0.67,
         default_kappa=np.inf,
         default_max_depth=30,
+        max_samples=None,
+        n_jobs=None,
+        honest_prior="ignore",
     ):
         self.default_n_estimators = default_n_estimators
         self.default_tree_construction_proportion = default_tree_construction_proportion
         self.default_kappa = default_kappa
         self.default_max_depth = default_max_depth
+        self.max_samples = max_samples
+        self.n_jobs = n_jobs
+        self.honest_prior = honest_prior
 
         self.pl_ = ClassificationProgressiveLearner(
             default_transformer_class=TreeClassificationTransformer,
@@ -61,6 +92,7 @@ def __init__(
             default_voter_kwargs={"kappa": default_kappa},
             default_decider_class=SimpleArgmaxAverage,
             default_decider_kwargs={},
+            n_jobs=n_jobs,
         )
 
     def add_task(
@@ -72,6 +104,7 @@ def add_task(
         tree_construction_proportion="default",
         kappa="default",
         max_depth="default",
+        transformer_kwargs={},
     ):
         """
         adds a task with id task_id, max tree depth max_depth, given input data matrix X
@@ -103,15 +136,20 @@ def add_task(
             The coefficient for finite sample correction.
             The default is used if 'default' is provided.
 
+        TODO prune max_depth into transformer_kwargs
         max_depth : int or str, default='default'
             The maximum depth of a tree in the Lifelong Classification Forest.
             The default is used if 'default' is provided.
 
+        transformer_kwargs : dict, default={}
+            Additional named arguments to be passed to the transformer.
+
         Returns
         -------
         self : LifelongClassificationForest
             The object itself.
         """
+        # TODO get rid of defaults in favor of None
         if n_estimators == "default":
             n_estimators = self.default_n_estimators
         if tree_construction_proportion == "default":
@@ -121,21 +159,34 @@ def add_task(
         if max_depth == "default":
             max_depth = self.default_max_depth
 
+        # TODO eliminate by subsuming max_depth
+        if not "fit_kwargs" in transformer_kwargs.keys():
+            transformer_kwargs["fit_kwargs"] = {}
+        transformer_kwargs["fit_kwargs"]["max_depth"] = max_depth
+
         X, y = check_X_y(X, y)
+        if isinstance(self.max_samples, int):
+            assert self.max_samples > 1
+            max_samples = min(1, self.max_samples / X.shape[0])
+        elif self.max_samples is None:
+            max_samples = 1.0
+        else:
+            max_samples = self.max_samples
         return self.pl_.add_task(
             X,
             y,
             task_id=task_id,
             transformer_voter_decider_split=[
-                tree_construction_proportion,
-                1 - tree_construction_proportion,
+                tree_construction_proportion * max_samples,
+                (1 - tree_construction_proportion) * max_samples,
                 0,
             ],
             num_transformers=n_estimators,
-            transformer_kwargs={"kwargs": {"max_depth": max_depth}},
+            transformer_kwargs=transformer_kwargs,
             voter_kwargs={
-                "classes": np.unique(y),
-                "kappa": kappa,
+                "classes" : np.unique(y),
+                "kappa" : kappa,
+                "honest_prior" : self.honest_prior
             },
             decider_kwargs={"classes": np.unique(y)},
         )
@@ -208,7 +259,8 @@ def predict_proba(self, X, task_id):
         y_proba_hat : ndarray of shape [n_samples, n_classes]
             posteriors per example
         """
-        return self.pl_.predict_proba(check_array(X), task_id)
+        X = check_array(X)
+        return self.pl_.predict_proba(X, task_id)
 
     def predict(self, X, task_id):
         """
@@ -232,7 +284,7 @@ def predict(self, X, task_id):
 
 class UncertaintyForest:
     """
-    A class used to represent an uncertainty forest.
+    A class used to represent an Uncertainty Forest.
 
     Parameters
     ----------
@@ -240,34 +292,113 @@ class UncertaintyForest:
         The number of trees in the UncertaintyForest
 
     kappa : float, default=np.inf
-        The coefficient for finite sample correction.
-        If set to the default value, finite sample correction is not performed.
+        The coefficient for finite sample correction. If set to the default
+        value, finite sample correction is not performed.
 
-    max_depth : int, default=30
-        The maximum depth of a tree in the UncertaintyForest
+    max_depth : int, default=None
+        The maximum depth of a tree in the UncertaintyForest.
 
-    tree_construction_proportion : float, default = 0.67
+    tree_construction_proportion : float, default=0.5
         The proportions of the input data set aside to train each decision
         tree. The remainder of the data is used to fill in voting posteriors.
 
+    max_features : {"auto", "sqrt", "log2"}, int or float, default="auto"
+        The number of features to consider when looking for the best split:
+        - If int, then consider `max_features` features at each split.
+        - If float, then `max_features` is a fraction and
+          `round(max_features * n_features)` features are considered at each
+          split.
+        - If "auto", then `max_features=sqrt(n_features)`.
+        - If "sqrt", then `max_features=sqrt(n_features)` (same as "auto").
+        - If "log2", then `max_features=log2(n_features)`.
+        - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    poisson_sampler : boolean, default=False
+        To match the GRF theory [#1grf]_, if True, the number of features
+        considered at each tree are drawn from a poisson distribution with
+        mean equal to `max_features`.
+
+    n_jobs : int, default=None
+        The number of jobs to run in parallel. ``-1`` means use all
+        processors. None equates to 1.
+
+    max_samples : int or float, default=None
+        The number of samples to draw from X (without replacement) to train
+        each tree.
+        - If None, then draw `X.shape[0]` samples.
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples. Thus,
+          `max_samples` should be in the interval `(0, 1)`.
+
+        Note: The number of samples used to learn the tree will be further
+        reduced per the `tree_construction_proportion` value.
+
+    honest_prior : {"ignore", "uniform", "empirical"}, default="ignore"
+        Method for dealing with empty leaves during evaluation of a test
+        sample. If "ignore", trees in which the leaf is empty are not used in
+        the prediction. If "uniform", the prior tree posterior is 1/(number of
+        classes). If "empirical", the prior tree posterior is the relative
+        class frequency in the voting subsample. If all tree leaves are empty,
+        "ignore" will use the empirical prior and the others will use their
+        respective priors.
+
+    tree_kwargs : dict, default={}
+        Named arguments to be passed to each
+        sklearn.tree.DecisionTreeClassifier tree used in the construction
+        of the forest in addition to the above parameters.
+
     Attributes
     ----------
+    estimators_ : list of sklearn.tree.DecisionTreeClassifier
+        The collection of fitted trees.
+
+    voters_ : list of proglearn.voter.TreeClassificationVoter
+        The collection of honest voters for leaves in matching trees in
+        `self.estimators_` at the same index.
+
     lf_ : LifelongClassificationForest
         Internal LifelongClassificationForest used to train and make
         inference.
+
+    n_features_ : int
+        The number of features when `fit` is performed.
+
+    tree_kwargs_ : dict
+        Full set of keyword arguments passed to the Forest transformer.
+
+    References
+    ----------
+    .. [#1grf] Athey, Susan, Julie Tibshirani and Stefan Wager.
+    "Generalized Random Forests", Annals of Statistics, 2019.
     """
 
     def __init__(
         self,
         n_estimators=100,
         kappa=np.inf,
-        max_depth=30,
-        tree_construction_proportion=0.67,
+        max_depth=None,
+        tree_construction_proportion=0.63,
+        max_features="auto",
+        poisson_sampler=False,
+        max_samples=None,
+        n_jobs=None,
+        honest_prior="ignore",
+        tree_kwargs={},
     ):
         self.n_estimators = n_estimators
         self.kappa = kappa
         self.max_depth = max_depth
         self.tree_construction_proportion = tree_construction_proportion
+        self.max_features = max_features
+        self.poisson_sampler = poisson_sampler
+        self.max_samples = max_samples
+        self.n_jobs = n_jobs
+        self.tree_kwargs = tree_kwargs
+        self.honest_prior = honest_prior
 
     def fit(self, X, y):
         """
@@ -286,15 +417,43 @@ def fit(self, X, y):
         self : UncertaintyForest
             The object itself.
         """
+        X, y = check_X_y(X, y)
+        self.n_features_ = X.shape[1]
         self.lf_ = LifelongClassificationForest(
             default_n_estimators=self.n_estimators,
             default_kappa=self.kappa,
             default_max_depth=self.max_depth,
             default_tree_construction_proportion=self.tree_construction_proportion,
+            max_samples=self.max_samples,
+            n_jobs=self.n_jobs,
+            honest_prior=self.honest_prior,
         )
 
-        X, y = check_X_y(X, y)
-        return self.lf_.add_task(X, y, task_id=0)
+        self.tree_kwargs_ = {
+            "fit_kwargs": self.tree_kwargs,
+            "max_features": self.max_features,
+            "poisson_sampler": self.poisson_sampler,
+        }
+        self.lf_.add_task(
+            X,
+            y,
+            task_id=0,
+            transformer_kwargs=self.tree_kwargs_,
+        )
+
+        return self
+
+    @property
+    def estimators_(self):
+        if not hasattr(self, "lf_"):
+            raise AttributeError("Model has not been fitted. Please fit first.")
+        return [t.transformer_ for t in self.lf_.pl_.transformer_id_to_transformers[0]]
+
+    @property
+    def voters_(self):
+        if not hasattr(self, "lf_"):
+            raise AttributeError("Model has not been fitted. Please fit first.")
+        return [t for t in self.lf_.pl_.task_id_to_transformer_id_to_voters[0][0]]
 
     def predict_proba(self, X):
         """
@@ -310,7 +469,10 @@ def predict_proba(self, X):
         y_proba_hat : ndarray of shape [n_samples, n_classes]
             posteriors per example
         """
-        return self.lf_.predict_proba(check_array(X), 0)
+        X = check_array(X)
+        if not hasattr(self, "lf_"):
+            raise AttributeError("Model has not been fitted. Please fit first.")
+        return self.lf_.predict_proba(X, 0)
 
     def predict(self, X):
         """
@@ -326,4 +488,7 @@ def predict(self, X):
         y_hat : ndarray of shape [n_samples]
             predicted class label per example
         """
-        return self.lf_.predict(check_array(X), 0)
+        X = check_array(X)
+        if not hasattr(self, "lf_"):
+            raise AttributeError("Model has not been fitted. Please fit first.")
+        return self.lf_.predict(X, 0)
diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py
index 4bfecc7f9c..f9aab814ae 100755
--- a/proglearn/progressive_learner.py
+++ b/proglearn/progressive_learner.py
@@ -1,9 +1,11 @@
 """
-Main Author: Will LeVine 
+Main Author: Will LeVine
 Corresponding Email: levinewill@icloud.com
 """
 import numpy as np
+from joblib import Parallel, delayed
 from .base import BaseClassificationProgressiveLearner, BaseProgressiveLearner
+from .transformers import TreeClassificationTransformer
 
 
 class ProgressiveLearner(BaseProgressiveLearner):
@@ -41,6 +43,14 @@ class ProgressiveLearner(BaseProgressiveLearner):
         to the given string kwarg. This determines to which type of decider the
         progressive learner defaults if None is provided in any of the functions
         which add or set deciders.
+    honest_prior : {"ignore", "uniform", "empirical"}, default="ignore"
+        Method for dealing with empty paritions during evaluation of a test
+        sample. If "ignore", paritions in which the leaf is empty are not used in
+        the prediction. If "uniform", the prior posterior is 1/(number of
+        classes). If "empirical", the prior posterior is the relative
+        class frequency in the voting subsample. If all posteriors are empty,
+        "ignore" will use the empirical prior and the others will use their
+        respective priors.
 
     Attributes
     ----------
@@ -71,7 +81,7 @@ class ProgressiveLearner(BaseProgressiveLearner):
         and values of type obj corresponding to a transformer. This dictionary thus
         maps transformer ids to the corresponding transformers.
 
-    task_id_to_trasnformer_id_to_voters : dict
+    task_id_to_transformer_id_to_voters : dict
         A nested dictionary with outer key of type obj, corresponding to task ids
         inner key of type obj, corresponding to transformer ids,
         and values of type obj, corresponding to a voter. This dictionary thus maps
@@ -137,6 +147,10 @@ class ProgressiveLearner(BaseProgressiveLearner):
     default_decider_kwargs : dict
         Stores the default decider kwargs as specified by the parameter
         default_decider_kwargs.
+
+    n_jobs : int, default=1
+        The number of jobs to run in parallel when adding multiple
+        transformers per task. ``-1`` means use all processors.
     """
 
     def __init__(
@@ -147,6 +161,7 @@ def __init__(
         default_voter_kwargs=None,
         default_decider_class=None,
         default_decider_kwargs=None,
+        n_jobs=None,
     ):
 
         (
@@ -178,6 +193,8 @@ def __init__(
         self.default_decider_class = default_decider_class
         self.default_decider_kwargs = default_decider_kwargs
 
+        self.n_jobs = n_jobs
+
     def get_transformer_ids(self):
         return np.array(list(self.transformer_id_to_transformers.keys()))
 
@@ -186,7 +203,8 @@ def get_task_ids(self):
 
     def _append_transformer(self, transformer_id, transformer):
         if transformer_id in self.get_transformer_ids():
-            self.transformer_id_to_transformers[transformer_id].append(transformer)
+            self.transformer_id_to_transformers[transformer_id].append(
+                transformer)
         else:
             self.transformer_id_to_transformers[transformer_id] = [transformer]
 
@@ -211,7 +229,8 @@ def _append_voter_data_idx(self, task_id, bag_id, voter_data_idx):
         if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()):
             self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = voter_data_idx
         else:
-            self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx}
+            self.task_id_to_bag_id_to_voter_data_idx[task_id] = {
+                bag_id: voter_data_idx}
 
     def _append_decider_idx(self, task_id, decider_idx):
         self.task_id_to_decider_idx[task_id] = decider_idx
@@ -229,7 +248,8 @@ def _bifurcate_decider_idxs(self, ra, transformer_voter_decider_split):
                     np.random.choice(ra, int(len(ra) * p), replace=False) for p in split
                 ]
             else:
-                first_idx = np.random.choice(ra, int(len(ra) * split[0]), replace=False)
+                first_idx = np.random.choice(
+                    ra, int(len(ra) * split[0]), replace=False)
                 second_idx = np.random.choice(
                     np.delete(ra, first_idx), int(len(ra) * split[1]), replace=False
                 )
@@ -243,8 +263,57 @@ def set_transformer(
         transformer_data_idx=None,
         transformer_class=None,
         transformer_kwargs=None,
+        parallel=False,
     ):
 
+        if transformer_class is None:
+            if self.default_transformer_class is None:
+                raise ValueError(
+                    "transformer_class is None and 'default_transformer_class' is None."
+                )
+            else:
+                transformer_class = self.default_transformer_class
+
+        if transformer_kwargs is None:
+            if self.default_transformer_kwargs is None:
+                raise ValueError(
+                    """transformer_kwargs is None and 
+                    'default_transformer_kwargs' is None."""
+                )
+            else:
+                transformer_kwargs = self.default_transformer_kwargs
+
+        if transformer is not None and transformer.is_fitted() and parallel:
+            raise ValueError(
+                "Parallelization not implemented for fitted transformers")
+        elif parallel and (
+            transformer_class == TreeClassificationTransformer
+        ):
+            # Possible solution to not recreate memory arrays, See
+            # sklearn/ensemble/_forest.py#L176
+            n_samples = (
+                self.transformer_id_to_X[transformer_id].shape[0]
+                if transformer_id in list(self.transformer_id_to_X.keys())
+                else self.task_id_to_X[transformer_id].shape[0]
+            )
+            sample_weight = np.ones((n_samples,), dtype=np.float64)
+            sample_counts = np.bincount(
+                transformer_data_idx, minlength=n_samples)
+            sample_weight *= sample_counts
+            transformer = transformer_class(
+                sample_weight=sample_weight, **transformer_kwargs).fit(
+                    (
+                        self.transformer_id_to_X[transformer_id]
+                        if transformer_id in list(self.transformer_id_to_X.keys())
+                        else self.task_id_to_X[transformer_id]
+                    ),
+                    (
+                        self.transformer_id_to_y[transformer_id]
+                        if transformer_id in list(self.transformer_id_to_y.keys())
+                        else self.task_id_to_y[transformer_id]
+                    ))
+            return transformer
+
         if transformer_id is None:
             transformer_id = len(self.get_transformer_ids())
 
@@ -262,7 +331,7 @@ def set_transformer(
             X, y = X[transformer_data_idx], y[transformer_data_idx]
 
         if X is None and y is None:
-            if transformer.is_fitted():
+            if transformer is not None and transformer.is_fitted():
                 self._append_transformer(transformer_id, transformer)
             else:
                 raise ValueError(
@@ -272,32 +341,21 @@ def set_transformer(
 
         # Type check X
 
-        if transformer_class is None:
-            if self.default_transformer_class is None:
-                raise ValueError(
-                    "transformer_class is None and 'default_transformer_class' is None."
-                )
-            else:
-                transformer_class = self.default_transformer_class
-
-        if transformer_kwargs is None:
-            if self.default_transformer_kwargs is None:
-                raise ValueError(
-                    """transformer_kwargs is None and 
-                    'default_transformer_kwargs' is None."""
-                )
-            else:
-                transformer_kwargs = self.default_transformer_kwargs
-
         # Fit transformer and new voter
         if y is None:
+            transformer = transformer_class(**transformer_kwargs).fit(X)
+            if parallel:
+                return transformer
             self._append_transformer(
-                transformer_id, transformer_class(**transformer_kwargs).fit(X)
+                transformer_id, transformer
             )
         else:
             # Type check y
+            transformer = transformer_class(**transformer_kwargs).fit(X, y)
+            if parallel:
+                return transformer
             self._append_transformer(
-                transformer_id, transformer_class(**transformer_kwargs).fit(X, y)
+                transformer_id, transformer
             )
 
     def set_voter(
@@ -308,7 +366,7 @@ def set_voter(
         voter_kwargs=None,
         bag_id=None,
     ):
-
+        # TODO parallelize, at least for trees
         # Type check X
 
         # Type check y
@@ -351,8 +409,11 @@ def set_voter(
         if bag_id is None:
             transformers = self.transformer_id_to_transformers[transformer_id]
         else:
-            transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]]
-        for transformer_num, transformer in enumerate(transformers):
+            transformers = [
+                self.transformer_id_to_transformers[transformer_id][bag_id]]
+
+        # for transformer_num, transformer in enumerate(transformers):
+        def _parallel_helper(transformer_num, transformer, parallel=True):
             if transformer_id == task_id:
                 voter_data_idx = self.task_id_to_bag_id_to_voter_data_idx[task_id][
                     transformer_num
@@ -361,13 +422,29 @@ def set_voter(
                 voter_data_idx = np.delete(
                     range(len(X)), self.task_id_to_decider_idx[task_id]
                 )
-            self._append_voter(
-                transformer_id,
-                task_id,
-                voter_class(**voter_kwargs).fit(
-                    transformer.transform(X[voter_data_idx]), y[voter_data_idx]
-                ),
-            )
+            voter = voter_class(**voter_kwargs).fit(
+                transformer.transform(X[voter_data_idx]), y[voter_data_idx])
+
+            if parallel:
+                return task_id, transformer_id, voter
+            else:
+                self._append_voter(
+                    transformer_id,
+                    task_id,
+                    voter_class(**voter_kwargs).fit(
+                        transformer.transform(
+                            X[voter_data_idx]), y[voter_data_idx]
+                    ),
+                )
+
+        # Parallel loop over voter training
+        # TODO Remove or fix. Tests show this causes drastic slowdown when in parallel
+        voter_info = Parallel(n_jobs=1)(
+            delayed(_parallel_helper)(transformer_num, transformer)
+            for transformer_num, transformer in enumerate(transformers)
+        )
+        for transformer_id, task_id, voter in voter_info:
+            self._append_voter(transformer_id, task_id, voter)
 
         self.task_id_to_voter_class[task_id] = voter_class
         self.task_id_to_voter_kwargs[task_id] = voter_kwargs
@@ -498,29 +575,46 @@ def add_transformer(
         if transformer_id not in list(self.task_id_to_y.keys()):
             self.transformer_id_to_y[transformer_id] = y
 
-        # train new transformers
-        for transformer_num in range(num_transformers):
-            if X is not None:
-                n = len(X)
-            elif y is not None:
-                n = len(y)
-            else:
-                n = None
+        if X is not None:
+            n = len(X)
+        elif y is not None:
+            n = len(y)
+        else:
+            n = None
+
+        # transformer helper function
+        def _train_new_transformer(transformer_num):
+            # train new transformers
             if n is not None:
                 transformer_data_idx = np.random.choice(
                     transformer_voter_data_idx,
                     int(transformer_data_proportion * n),
-                    replace=False,
+                    replace=False,  # No bootstrapping
                 )
             else:
                 transformer_data_idx = None
-            self.set_transformer(
+
+            transformer = self.set_transformer(
                 transformer_id=transformer_id,
                 transformer_data_idx=transformer_data_idx,
                 transformer_class=transformer_class,
                 transformer_kwargs=transformer_kwargs,
+                parallel=True,
+            )
+            voter_data_idx = np.setdiff1d(
+                transformer_voter_data_idx, transformer_data_idx)
+
+            return transformer_num, transformer_id, transformer, voter_data_idx
+
+        # Parallel loop over transformer training
+        ensemble = Parallel(n_jobs=self.n_jobs)(
+            delayed(_train_new_transformer)(num) for num in range(num_transformers)
+        )
+
+        for transformer_num, transformer_id, transformer, voter_data_idx in ensemble:
+            self._append_transformer(
+                transformer_id, transformer
             )
-            voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx)
             self._append_voter_data_idx(
                 task_id=transformer_id,
                 bag_id=transformer_num,
@@ -529,11 +623,13 @@ def add_transformer(
 
         # train voters and deciders from new transformer to previous tasks
         for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()):
-            self.set_voter(transformer_id=transformer_id, task_id=existing_task_id)
+            self.set_voter(transformer_id=transformer_id,
+                           task_id=existing_task_id)
             self.set_decider(
                 task_id=existing_task_id,
                 transformer_ids=list(
-                    self.task_id_to_transformer_id_to_voters[existing_task_id].keys()
+                    self.task_id_to_transformer_id_to_voters[existing_task_id].keys(
+                    )
                 ),
             )
 
@@ -680,7 +776,8 @@ def add_task(
             if num_transformers == 0:
                 transformer_ids = forward_transformer_ids
             else:
-                transformer_ids = np.concatenate([forward_transformer_ids, task_id])
+                transformer_ids = np.concatenate(
+                    [forward_transformer_ids, task_id])
         else:
             transformer_ids = self.get_transformer_ids()
         self.set_decider(
diff --git a/proglearn/tests/test_forest.py b/proglearn/tests/test_forest.py
index acf7e9976b..7030435e40 100644
--- a/proglearn/tests/test_forest.py
+++ b/proglearn/tests/test_forest.py
@@ -1,12 +1,15 @@
-import unittest
-import pytest
-import numpy as np
-import random
-
-from proglearn.forest import LifelongClassificationForest
-from proglearn.transformers import TreeClassificationTransformer
-from proglearn.voters import TreeClassificationVoter
 from proglearn.deciders import SimpleArgmaxAverage
+from proglearn.voters import TreeClassificationVoter
+from proglearn.transformers import TreeClassificationTransformer
+from proglearn.forest import LifelongClassificationForest, UncertaintyForest
+import random
+import numpy as np
+import time
+import pytest
+import unittest
+import pprint
+import sys
+pprint.pprint(sys.path)
 
 
 class TestLifelongClassificationForest:
@@ -47,3 +50,98 @@ def test_correct_default_n_estimators(self):
     def test_correct_true_initilization_finite_sample_correction(self):
         l2f = LifelongClassificationForest(default_kappa=np.inf)
         assert l2f.pl_.default_voter_kwargs == {"kappa": np.inf}
+
+
+# Test Uncertainty Forest
+
+
+def test_uf_accuracy():
+    uf = UncertaintyForest()
+    X = np.ones((20, 4))
+    X[10:] *= -1
+    y = [0] * 10 + [1] * 10
+    uf = uf.fit(X, y)
+    np.testing.assert_array_equal(uf.predict(X), y)
+
+
+@pytest.mark.parametrize("max_depth", [1, None])
+@pytest.mark.parametrize("max_features", [2, 0.5, "auto", "sqrt", "log2"])
+@pytest.mark.parametrize("poisson_sampler", [False, True])
+def test_decision_tree_params(max_depth, max_features, poisson_sampler):
+    uf = UncertaintyForest(
+        max_depth=max_depth, max_features=max_features, poisson_sampler=poisson_sampler
+    )
+    X = np.ones((12, 20))
+    X[6:] *= -1
+    y = [0] * 6 + [1] * 6
+    uf = uf.fit(X, y)
+
+    assert uf.n_estimators == len(uf.estimators_)
+    depths = [est.max_depth for est in uf.estimators_]
+    assert all(np.asarray(depths) == max_depth)
+
+    features = [est.max_features for est in uf.estimators_]
+    if poisson_sampler:
+        assert not all(np.asarray(features) == features[0])
+    else:
+        assert all(np.asarray(features) == max_features)
+
+
+def test_parallel_trees():
+    uf = UncertaintyForest(n_estimators=100, n_jobs=1,
+                           max_features=1.0, tree_construction_proportion=0.5)
+    uf_parallel = UncertaintyForest(
+        n_estimators=100, n_jobs=10, max_features=1.0, tree_construction_proportion=0.5)
+    X = np.random.normal(0, 1, (1000, 100))
+    y = [0, 1] * (len(X) // 2)
+
+    time_start = time.time()
+    uf.fit(X, y)
+    time_diff = time.time() - time_start
+
+    time_start = time.time()
+    uf_parallel.fit(X, y)
+    time_parallel_diff = time.time() - time_start
+
+    assert time_parallel_diff / time_diff < 0.9
+
+
+def test_max_samples():
+    max_samples_list = [8, 0.5, None]
+    depths = []
+    X = np.random.normal(0, 1, (100, 2))
+    X[:50] *= -1
+    y = [0, 1] * 50
+    for ms in max_samples_list:
+        uf = UncertaintyForest(n_estimators=1, max_samples=ms)
+        uf = uf.fit(X, y)
+        depths.append(uf.estimators_[0].get_depth())
+
+    assert all(np.diff(depths) > 0)
+
+@pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore"])
+def test_honest_prior(honest_prior):
+    X = np.random.normal(0, 1, (60, 2))
+    X[:30] *= -1
+    y = [0, 0, 1] * 20
+    uf = UncertaintyForest(n_estimators=5, honest_prior=honest_prior)
+    uf = uf.fit(X, y)
+    if honest_prior == 'uniform':
+        assert all([len(set(voter.prior_posterior_)) == 1 for voter in uf.voters_])
+    elif honest_prior in ('ignore', 'empirical'):
+        assert all([np.diff(voter.prior_posterior_) < 0 for voter in uf.voters_])
+
+
+@pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore"])
+def test_empty_leaves(honest_prior):
+    np.random.seed(0)
+    X = np.random.normal(0, 1, (100, 2))
+    y = [0]*75 + [1]*25
+    uf = UncertaintyForest(n_estimators=1, honest_prior=honest_prior, tree_construction_proportion=0.96, kappa=np.inf)
+    uf = uf.fit(X, y)
+
+    y_proba = uf.predict_proba(X)
+    if honest_prior == 'uniform':
+        assert len(np.where(y_proba[:, 0] == 0.5)[0]) > 50
+    elif honest_prior in ('ignore', 'empirical'):
+        assert len(np.where(y_proba[:, 0] == 0.75)[0]) > 50
diff --git a/proglearn/transformers.py b/proglearn/transformers.py
index 6af4d5fe8b..e9f42f1ead 100644
--- a/proglearn/transformers.py
+++ b/proglearn/transformers.py
@@ -2,6 +2,7 @@
 Main Author: Will LeVine
 Corresponding Email: levinewill@icloud.com
 """
+import warnings
 import keras
 import numpy as np
 from sklearn.tree import DecisionTreeClassifier
@@ -133,17 +134,53 @@ class TreeClassificationTransformer(BaseTransformer):
 
     Parameters
     ----------
-    kwargs : dict, default={}
-        A dictionary to contain parameters of the tree.
+    max_features : {"auto", "sqrt", "log2"}, int or float, default=None
+        The number of features to consider when looking for the best split:
+        - If int, then consider `max_features` features at each split.
+        - If float, then `max_features` is a fraction and
+          `round(max_features * n_features)` features are considered at each
+          split.
+        - If "auto", then `max_features=sqrt(n_features)`.
+        - If "sqrt", then `max_features=sqrt(n_features)`.
+        - If "log2", then `max_features=log2(n_features)`.
+        - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    poisson_sampler : boolean, default=False
+        To match the GRF theory [#1grf]_, if True, the number of features
+        considered at each tree are drawn from a poisson distribution with
+        mean equal to `max_features`.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        If None, all samples weighted equally. Used for efficient parallelization
+        by making non-sampled X,y weighted with 0.
+
+    fit_kwargs : dict, default={}
+        Named arguments passed to the sklearn.tree.DecisionTreeClassifier tree
+        created during `fit`.
 
     Attributes
     ----------
-    transformer : sklearn.tree.DecisionTreeClassifier
-        an internal sklearn DecisionTreeClassifier
+    transformer_ : sklearn.tree.DecisionTreeClassifier
+        an internal sklearn.tree.DecisionTreeClassifier.
+
+    n_features_ : int
+        The number of features of the data fitted.
+
+    References
+    ----------
+    .. [#1grf] Athey, Susan, Julie Tibshirani and Stefan Wager.
+    "Generalized Random Forests", Annals of Statistics, 2019.
     """
 
-    def __init__(self, kwargs={}):
-        self.kwargs = kwargs
+    def __init__(self, max_features=1.0, poisson_sampler=False, sample_weight=None, fit_kwargs={}):
+        self.max_features = max_features
+        self.poisson_sampler = poisson_sampler
+        self.sample_weight = sample_weight
+        self.fit_kwargs = fit_kwargs
 
     def fit(self, X, y):
         """
@@ -162,7 +199,34 @@ def fit(self, X, y):
             The object itself.
         """
         X, y = check_X_y(X, y)
-        self.transformer_ = DecisionTreeClassifier(**self.kwargs).fit(X, y)
+        self.n_features_ = X.shape[1]
+        self.prior_posterior_ = np.bincount(y) / len(y)
+        if self.poisson_sampler:
+            if self.max_features in ("auto", "sqrt"):
+                max_features = np.sqrt(self.n_features_)
+            elif self.max_features == "log2":
+                max_features = np.log2(self.n_features_)
+            elif isinstance(self.max_features, float):
+                assert self.max_features > 0, self.max_features
+                max_features = self.max_features * self.n_features_
+            elif isinstance(self.max_features, int):
+                assert self.max_features > 0, self.max_features
+                max_features = self.max_features
+            else:
+                raise ValueError(f"max_features value not an accepted value")
+            if max_features > self.n_features_:
+                warnings.warn(
+                    "max_features value led to poisson mean "
+                    + "({max_features}) > the number of features"
+                )
+            max_features = int(max_features)
+            max_features = min(max(np.random.poisson(max_features), 1), self.n_features_)
+        else:
+            max_features = self.max_features
+
+        self.transformer_ = DecisionTreeClassifier(
+            max_features=max_features, **self.fit_kwargs
+        ).fit(X, y, sample_weight=self.sample_weight)
         return self
 
     def transform(self, X):
diff --git a/proglearn/voters.py b/proglearn/voters.py
index 838f4bfdf6..6908378640 100755
--- a/proglearn/voters.py
+++ b/proglearn/voters.py
@@ -27,21 +27,34 @@ class TreeClassificationVoter(BaseClassificationVoter):
     classes : list, default=[]
         list of all possible output label values
 
+    honest_prior : {"ignore", "uniform", "empirical"}, default="ignore"
+        Method for dealing with empty leaves during evaluation of a test
+        sample. If "ignore", trees in which the leaf is empty are not used in
+        the prediction. If "uniform", the prior tree posterior is 1/(number of
+        classes). If "empirical", the prior tree posterior is the relative
+        class frequency in the voting subsample. If all tree leaves are empty,
+        "ignore" will use the empirical prior and the others will use their
+        respective priors.
+
     Attributes
     ----------
     missing_label_indices_ : list
         a (potentially empty) list of label values
         that exist in the ``classes`` parameter but
         are missing in the latest ``fit`` function
-        call
+        call.
 
-    uniform_posterior_ : ndarray of shape (n_classes,)
-        the uniform posterior associated with the
+    prior_posterior_ : ndarray of shape (n_classes,)
+        The prior posterior associated with zero posteriors.
+
+    num_fit_classes_ : int
+        Number of unique classes in the set of fitted labels.
     """
 
-    def __init__(self, kappa=np.inf, classes=[]):
+    def __init__(self, kappa=np.inf, classes=[], honest_prior="ignore"):
         self.kappa = kappa
         self.classes = np.asarray(classes)
+        self.honest_prior = honest_prior
 
     def fit(self, X, y):
         """
@@ -49,8 +62,8 @@ def fit(self, X, y):
 
         Parameters
         ----------
-        X : array of shape [n_samples, n_features]
-            the transformed input data
+        X : array of shape [n_samples,]
+            Leaf indices each sample falls into
         y : array of shape [n_samples]
             the class labels
 
@@ -61,15 +74,22 @@ def fit(self, X, y):
         """
         check_classification_targets(y)
 
-        num_fit_classes = len(np.unique(y))
+        self.num_fit_classes_ = len(np.unique(y))
         self.missing_label_indices_ = []
 
-        if self.classes.size != 0 and num_fit_classes < len(self.classes):
+        if self.classes.size != 0 and self.num_fit_classes_ < len(self.classes):
             for idx, label in enumerate(self.classes):
                 if label not in np.unique(y):
                     self.missing_label_indices_.append(idx)
 
-        self.uniform_posterior_ = np.ones(num_fit_classes) / num_fit_classes
+        if self.honest_prior == "uniform":
+            self.prior_posterior_ = np.ones(self.num_fit_classes_) / self.num_fit_classes_
+        elif self.honest_prior in ("empirical", "ignore"):
+            self.prior_posterior_ = np.bincount(
+                y, minlength=len(self.classes)) / len(y)
+        else:
+            raise ValueError("honest_prior must be in " + 
+                "{'ignore', 'uniform', 'empirical'}")
 
         self.leaf_to_posterior_ = {}
 
@@ -92,8 +112,8 @@ def predict_proba(self, X):
 
         Parameters
         ----------
-        X : array of shape [n_samples, n_features]
-            the transformed input data
+        X : array of shape [n_samples,]
+            Indices of the leaf each sample falls into
 
         Returns
         -------
@@ -110,8 +130,10 @@ def predict_proba(self, X):
         for x in X:
             if x in list(self.leaf_to_posterior_.keys()):
                 votes_per_example.append(self.leaf_to_posterior_[x])
+            elif self.honest_prior == "ignore":
+                votes_per_example.append(np.zeros(self.num_fit_classes_))
             else:
-                votes_per_example.append(self.uniform_posterior_)
+                votes_per_example.append(self.prior_posterior_)
 
         votes_per_example = np.array(votes_per_example)
 
@@ -162,10 +184,12 @@ def _finite_sample_correction(self, posteriors, num_points_in_partition, kappa):
         y_proba_hat : ndarray of shape [n_samples, n_classes]
             posteriors per example
         """
-        correction_constant = 1 / (kappa * num_points_in_partition)
+        # correction_constant = 1 / (kappa * num_points_in_partition)
+
+        # zero_posterior_idxs = np.where(posteriors == 0)[0]
+        # posteriors[zero_posterior_idxs] = correction_constant
 
-        zero_posterior_idxs = np.where(posteriors == 0)[0]
-        posteriors[zero_posterior_idxs] = correction_constant
+        posteriors += self.prior_posterior_ / (kappa * num_points_in_partition)
 
         posteriors /= sum(posteriors)