WayScience · wli51 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+# images and anything under mlflow
+*.png
+examples/example_train*/*
+
+# pycache
+*.pyc
diff --git a/callbacks/AbstractCallback.py b/callbacks/AbstractCallback.py
@@ -0,0 +1,62 @@
+from abc import ABC
+
+class AbstractCallback(ABC):
+    """
+    Abstract class for callbacks in the training process.
+    Callbacks can be used to plot intermediate metrics, log contents, save checkpoints, etc.
+    """
+
+    def __init__(self, name: str):
+        """
+        :param name: Name of the callback.
+        """        
+        self._name = name
+        self._trainer = None
+
+    @property
+    def name(self):
+        """
+        Getter for callback name
+        """
+        return self._name
+
+    @property
+    def trainer(self):
+        """
+        Allows for access of trainer
+        """
+        return self._trainer
+
+    def _set_trainer(self, trainer):
+        """
+        Helper function called by trainer class to initialize trainer value field
+
+        :param trainer: trainer object
+        :type trainer: AbstractTrainer or subclass
+        """
+
+        self._trainer = trainer
+
+    def on_train_start(self):
+        """
+        Called at the start of training.
+        """
+        pass
+
+    def on_epoch_start(self):
+        """
+        Called at the start of each epoch.
+        """
+        pass
+
+    def on_epoch_end(self):
+        """
+        Called at the end of each epoch.
+        """
+        pass
+
+    def on_train_end(self):
+        """
+        Called at the end of training.
+        """
+        pass
diff --git a/callbacks/IntermediatePlot.py b/callbacks/IntermediatePlot.py
@@ -0,0 +1,117 @@
+import pathlib
+from typing import List, Union
+import random
+
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset
+
+from .AbstractCallback import AbstractCallback
+from ..datasets.PatchDataset import PatchDataset
+from ..evaluation.visualization_utils import plot_predictions_grid_from_model
+
+class IntermediatePlot(AbstractCallback):
+    """
+    Callback to plot model generated outputs, ground 
+    truth, and input stained image patches at the end of each epoch.
+    """
+
+    def __init__(self,                
+                 name: str, 
+                 path: Union[pathlib.Path, str], 
+                 dataset: Union[Dataset, PatchDataset], 
+                 plot_n_patches: int=5,
+                 indices: Union[List[int], None]=None,
+                 plot_metrics: List[nn.Module]=None,
+                 every_n_epochs: int=5,
+                 random_seed: int=42,
+                 **kwargs):
+        """
+        Initialize the IntermediatePlot callback.
+        Allows plots of predictions to be generated during training for monitoring of training progress.
+        Supports both PatchDataset and Dataset classes for plotting.
+        This callback, when passed into the trainer, will plot the model predictions on a subset of the provided dataset at the end of each epoch.
+
+        :param name: Name of the callback.
+        :param path: Path to save the model weights.
+        :type path: Union[pathlib.Path, str]
+        :param dataset: Dataset to be used for plotting intermediate results.
+        :type dataset: Union[Dataset, PatchDataset]
+        :param plot_n_patches: Number of patches to randomly select and plot, defaults to 5.
+        The exact patches/images being plotted may vary due to a difference in seed or dataset size. 
+        To ensure best reproducibility and consistency, please use a fixed dataset and indices argument instead.
+        :type plot_n_patches: int, optional
+        :param indices: Optional list of specific indices to subset the dataset before inference.
+        Overrides the plot_n_patches and random_seed arguments and uses the indices list to subset. 
+        :type indices: Union[List[int], None]
+        :param plot_metrics: List of metrics to compute and display in plot title, defaults to None.
+        :type plot_metrics: List[nn.Module], optional
+        :param kwargs: Additional keyword arguments to be passed to plot_patches.
+        :type kwargs: dict
+        :param every_n_epochs: How frequent should intermediate plots should be plotted, defaults to 5
+        :type every_n_epochs: int
+        :param random_seed: Random seed for reproducibility for random patch/image selection, defaults to 42.
+        :type random_seed: int
+        :raises TypeError: If the dataset is not an instance of PatchDataset.
+        """
+        super().__init__(name)
+        self._path = path
+        if isinstance(dataset, Dataset):
+            pass
+        elif isinstance(dataset, PatchDataset):
+            pass
+        else:
+            raise TypeError(f"Expected PatchDataset, got {type(dataset)}")
+
+        self._dataset = dataset
+
+        # Additional kwargs passed to plot_patches
+        self.plot_metrics = plot_metrics
+        self.every_n_epochs = every_n_epochs
+        self.plot_kwargs = kwargs
+
+        if indices is not None:
+            # Check if indices are within bounds
+            for i in indices:
+                if i >= len(self._dataset):
+                    raise ValueError(f"Index {i} out of bounds for dataset of size {len(self._dataset)}")
+            self._dataset_subset_indices = indices
+        else:
+            # Generate random indices to subset given seed and plot_n_patches
+            plot_n_patches = min(plot_n_patches, len(self._dataset))
+            random.seed(random_seed)
+            self._dataset_subset_indices = random.sample(range(len(self._dataset)), plot_n_patches)
+
+    def on_epoch_end(self):
+        """
+        Called at the end of each epoch to plot predictions if the epoch is a multiple of `every_n_epochs`.
+        """
+        if (self.trainer.epoch + 1) % self.every_n_epochs == 0 or self.trainer.epoch + 1 == self.trainer.epoch:
+            self._plot()
+
+    def on_train_end(self):
+        """
+        Called at the end of training. Plots if not already done in the last epoch.
+        """
+        if (self.trainer.epoch + 1) % self.every_n_epochs != 0:
+            self._plot()
+
+    def _plot(self):
+        """
+        Helper method to generate and save plots.
+        Plot dataset with model predictions on n random images from dataset at the end of each epoch.
+        Called by the on_epoch_end and on_train_end methods
+        """
+
+        original_device = next(self.trainer.model.parameters()).device
+
+        plot_predictions_grid_from_model(
+            model=self.trainer.model,
+            dataset=self._dataset,
+            indices=self._dataset_subset_indices,
+            metrics=self.plot_metrics,
+            save_path=f"{self._path}/epoch_{self.trainer.epoch}.png",
+            device=original_device,
+            show=False,
+            **self.plot_kwargs
+        )
diff --git a/callbacks/MlflowLogger.py b/callbacks/MlflowLogger.py
@@ -0,0 +1,115 @@
+import os
+import pathlib
+import tempfile
+from typing import Union, Dict, Optional
+
+import mlflow
+import torch 
+
+from .AbstractCallback import AbstractCallback
+
+class MlflowLogger(AbstractCallback):
+    """
+    Callback to log metrics to MLflow.
+    """
+
+    def __init__(self, 
+
+                 name: str,
+                 artifact_name: str = 'best_model_weights.pth',
+                 mlflow_uri: Union[pathlib.Path, str] = None,
+                 mlflow_experiment_name: Optional[str] = None,
+                 mlflow_start_run_args: dict = None,
+                 mlflow_log_params_args: dict = None,
+
+                 ):
+        """
+        Initialize the MlflowLogger callback.
+
+        :param name: Name of the callback.
+        :param artifact_name: Name of the artifact file to log, defaults to 'best_model_weights.pth'.
+        :param mlflow_uri: URI for the MLflow tracking server, defaults to None.
+        If a path is specified, the logger class will call set_tracking_uri to that supplied path 
+        thereby initiating a new tracking server. 
+        If None (default), the logger class will not tamper with mlflow server to enable logging to a global server
+        initialized outside of this class. 
+        :type mlflow_uri: pathlib.Path or str, optional
+        :param mlflow_experiment_name: Name of the MLflow experiment, defaults to None, which will not call the 
+        set_experiment method of mlflow and will use whichever experiment name that is globally configured. If a 
+        name is provided, the logger class will call set_experiment to that supplied name.
+        :type mlflow_experiment_name: str, optional
+        :param mlflow_start_run_args: Additional arguments for starting an MLflow run, defaults to None.
+        :type mlflow_start_run_args: dict, optional
+        :param mlflow_log_params_args: Additional arguments for logging parameters to MLflow, defaults to None.
+        :type mlflow_log_params_args: dict, optional
+        """
+        super().__init__(name)
+
+        if mlflow_uri is not None:
+            try:
+                mlflow.set_tracking_uri(mlflow_uri)
+            except Exception as e:
+                raise RuntimeError(f"Error setting MLflow tracking URI: {e}")                
+
+        if mlflow_experiment_name is not None:
+            try:
+                mlflow.set_experiment(mlflow_experiment_name)
+            except Exception as e:
+                raise RuntimeError(f"Error setting MLflow experiment: {e}")
+
+        self._artifact_name = artifact_name
+        self._mlflow_start_run_args = mlflow_start_run_args
+        self._mlflow_log_params_args = mlflow_log_params_args
+
+    def on_train_start(self):
+        """
+        Called at the start of training.
+
+        Calls mlflow start run and logs params if provided
+        """
+
+        if self._mlflow_start_run_args is None:
+            pass
+        elif isinstance(self._mlflow_start_run_args, Dict):
+            mlflow.start_run(
+                **self._mlflow_start_run_args
+            )
+        else:
+            raise TypeError("mlflow_start_run_args must be None or a dictionary.")
+
+        if self._mlflow_log_params_args is None:
+            pass
+        elif isinstance(self._mlflow_log_params_args, Dict):
+            mlflow.log_params(
+                self._mlflow_log_params_args
+            )
+        else:
+            raise TypeError("mlflow_log_params_args must be None or a dictionary.")
+
+    def on_epoch_end(self):
+        """
+        Called at the end of each epoch.
+
+        Iterate over the most recent log items in trainer and call mlflow log metric
+        """
+        for key, values in self.trainer.log.items():
+            if values is not None and len(values) > 0: 
+                value = values[-1]
+            else:
+                value = None
+            mlflow.log_metric(key, value, step=self.trainer.epoch)
+
+    def on_train_end(self):
+        """
+        Called at the end of training.
+
+        Saves trainer best model to a temporary directory and calls mlflow log artifact
+        Then ends run
+        """
+        # Save weights to a temporary directory and log artifacts
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            weights_path = os.path.join(tmpdirname, self._artifact_name)
+            torch.save(self.trainer.best_model, weights_path)
+            mlflow.log_artifact(weights_path, artifact_path="models")
+
+        mlflow.end_run()        
diff --git a/callbacks/README.md b/callbacks/README.md
@@ -0,0 +1,3 @@
+Here lives the callback classes that are meant to be fed into trainers to do stuff like saving images every epoch and logging. 
+
+The callback classes must inherit the abstract class. 
diff --git a/cp_gan_env.yml b/cp_gan_env.yml
@@ -0,0 +1,39 @@
+name: cp_gan_env
+channels:
+  - anaconda
+  - pytorch
+  - nvidia
+  - conda-forge
+dependencies:
+  - conda-forge::python=3.9
+  - conda-forge::pip
+  - pytorch::pytorch
+  - pytorch::torchvision
+  - pytorch::torchaudio
+  - pytorch::pytorch-cuda=12.1
+  - conda-forge::seaborn
+  - conda-forge::matplotlib
+  - conda-forge::jupyter
+  - conda-forge::pre_commit
+  - conda-forge::pandas
+  - conda-forge::pillow
+  - conda-forge::numpy
+  - conda-forge::pathlib2
+  - conda-forge::scikit-learn
+  - conda-forge::opencv
+  - conda-forge::pyarrow
+  - conda-forge::ipython
+  - conda-forge::notebook
+  - conda-forge::albumentations
+  - conda-forge::optuna
+  - conda-forge::mysqlclient
+  - conda-forge::openjdk
+  - conda-forge::gtk2
+  - conda-forge::typing-extensions
+  - conda-forge::Jinja2
+  - conda-forge::inflect
+  - conda-forge::wxpython
+  - conda-forge::sentry-sdk
+  - pip:
+      - mlflow
+      - cellprofiler==4.2.8
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Here lives the callback classes that are meant to be fed into trainers to do stuff like saving images every epoch and logging.

		The callback classes must inherit the abstract class.