Merge pull request #230 from cognizant-ai-labs/model_ensembles

JamiesonWarner · web-flow · commit 107bb4ddd60a · 2023-08-11T10:15:29.000-05:00
Model ensembles
diff --git a/covid_xprize/examples/predictors/conditional_lstm/conditional_xprize_predictor.py b/covid_xprize/examples/predictors/conditional_lstm/conditional_xprize_predictor.py
@@ -51,7 +51,8 @@ def train(self,
               nb_training_geos: int = NB_TRAINING_DAYS,
               nb_testing_geos: int = NB_TESTING_GEOS,
               nb_trials: int = NUM_TRIALS,
-              nb_epochs: int = NUM_EPOCHS,) -> Union[Model, tuple[Model, dict]]:
+              nb_epochs: int = NUM_EPOCHS,
+              return_all_trials: bool = False) -> Union[Model, tuple[Model, dict]]:
         best_model, results_df = train_predictor(
             training_data=self.df,
             nb_lookback_days=NB_LOOKBACK_DAYS,
@@ -62,6 +63,7 @@ def train(self,
             nb_trials=nb_trials,
             nb_epochs=nb_epochs,
             lstm_size=LSTM_SIZE,
+            return_all_trials=return_all_trials
         )
         if return_results:
             return best_model, results_df
diff --git a/covid_xprize/examples/predictors/conditional_lstm/train_predictor.py b/covid_xprize/examples/predictors/conditional_lstm/train_predictor.py
@@ -40,6 +40,7 @@ def train_predictor(training_data: pd.DataFrame,
                     nb_trials: int,
                     nb_epochs: int,
                     lstm_size: int,
+                    return_all_trials: bool = False,
                     verbose = False) -> tuple[Model, pd.DataFrame]:
     """Trains a prediction model using the given hyperparameter arguments. 
     :param nb_lookback_days: This option is not fully implemented yet. Completing implementation
@@ -65,6 +66,7 @@ def train_predictor(training_data: pd.DataFrame,
     :param context_column: Which column in the data df to use as context and outcome.
     :param arch: Which predictor architecture to use.
         Current options are 'conditional' and 'independent'.
+    :param return_all_trials: If set to True, then this function returns all trials as a list of trained models.
     :param verbose: Verbosity level for model.fit() when training the predictor.
     :returns: (best_model, results_df)
     """
@@ -181,10 +183,13 @@ def train_predictor(training_data: pd.DataFrame,
         'test_loss': test_losses,
         'test_case_mae': test_case_maes})
 
-    # Select best model
-    print("Best test case mae:", np.min(test_case_maes))
-    best_model = models[np.argmin(test_case_maes)]
-    return best_model, results_df
+    if return_all_trials:
+        return models, results_df
+    else:
+        # Select best model
+        print("Best test case mae:", np.min(test_case_maes))
+        best_model = models[np.argmin(test_case_maes)]
+        return best_model, results_df
 
 
 # Shuffling data prior to train/val split
diff --git a/covid_xprize/examples/predictors/lstm/xprize_predictor.py b/covid_xprize/examples/predictors/lstm/xprize_predictor.py
@@ -214,7 +214,12 @@ def _convert_ratios_to_total_cases(self,
     def _smooth_case_list(case_list, window):
         return pd.Series(case_list).rolling(window).mean().to_numpy()
 
-    def train(self, num_trials=NUM_TRIALS, num_epochs=NUM_EPOCHS):
+    def train(self, num_trials=NUM_TRIALS, num_epochs=NUM_EPOCHS, return_all_trials=False):
+        """Trains the weights of the predictor model on a prediction loss.
+        :param num_trials: The number of LSTM models to train. The top performer is selected.
+        :param num_epochs: The number of iterations through the training data performed.
+        :param return_all_trials: If set to True, then this function returns all trials as a list.
+        """
         print("Creating numpy arrays for Keras for each country...")
         geos = self._most_affected_geos(self.df, MAX_NB_COUNTRIES, NB_LOOKBACK_DAYS)
         country_samples = create_country_samples(self.df, geos, CONTEXT_COLUMN, NB_TEST_DAYS, NB_LOOKBACK_DAYS)
@@ -277,6 +282,10 @@ def train(self, num_trials=NUM_TRIALS, num_epochs=NUM_EPOCHS):
             print('Val Loss:', val_loss)
             print('Test Loss:', test_loss)
 
+        if return_all_trials:
+            # Shortcut to avoid model evaluation & winner selection.
+            return models
+
         # Gather test info
         country_indeps = []
         country_predss = []
diff --git a/requirements.txt b/requirements.txt
@@ -5,8 +5,8 @@ numpy==1.24.2
 notebook==6.5.3
 scikit-learn==1.2.2
 scipy==1.10.1
-tensorflow==2.11.1
-keras==2.11.0
+tensorflow==2.13.0
+keras==2.13.1
 neat-python==0.92
 h5py==3.8.0
 
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 import sys
 from setuptools import setup, find_packages
 
-LIBRARY_VERSION = '2.0.2'
+LIBRARY_VERSION = '2.0.3'
 
 CURRENT_PYTHON = sys.version_info[:2]
 REQUIRED_PYTHON = (3, 10)
@@ -61,14 +61,14 @@ def read(fname):
         ]
     },
     install_requires=[
-        'keras==2.11.0',
         'neat-python==0.92',
         'numpy==1.24.2',
         'pandas==1.5.3',
         'scikit-learn==1.2.2',
         'scipy==1.10.1',
         'setuptools==67.6.0',
-        'tensorflow==2.11.1',
+        'tensorflow==2.13.0',
+        'keras==2.13.1',
         'h5py==3.8.0'
     ],
     description='Contains sample code and notebooks '