WayScience · wli51 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+# images and anything under mlflow
+*.png
+examples/example_train*/*
+
+# pycache
+*.pyc
diff --git a/callbacks/AbstractCallback.py b/callbacks/AbstractCallback.py
@@ -0,0 +1,62 @@
+from abc import ABC
+
+class AbstractCallback(ABC):
+    """
+    Abstract class for callbacks in the training process.
+    Callbacks can be used to plot intermediate metrics, log contents, save checkpoints, etc.
+    """
+
+    def __init__(self, name: str):
+        """
+        :param name: Name of the callback.
+        """        
+        self._name = name
+        self._trainer = None
+
+    @property
+    def name(self):
+        """
+        Getter for callback name
+        """
+        return self._name
+
+    @property
+    def trainer(self):
+        """
+        Allows for access of trainer
+        """
+        return self._trainer
+
+    def _set_trainer(self, trainer):
+        """
+        Helper function called by trainer class to initialize trainer value field
+
+        :param trainer: trainer object
+        :type trainer: AbstractTrainer or subclass
+        """
+
+        self._trainer = trainer
+
+    def on_train_start(self):
+        """
+        Called at the start of training.
+        """
+        pass
+
+    def on_epoch_start(self):
+        """
+        Called at the start of each epoch.
+        """
+        pass
+
+    def on_epoch_end(self):
+        """
+        Called at the end of each epoch.
+        """
+        pass
+
+    def on_train_end(self):
+        """
+        Called at the end of training.
+        """
+        pass
diff --git a/callbacks/IntermediatePlot.py b/callbacks/IntermediatePlot.py
@@ -0,0 +1,69 @@
+from typing import List, Union
+
+import torch
+import torch.nn as nn
+
+from .AbstractCallback import AbstractCallback
+from ..datasets.PatchDataset import PatchDataset
+
+from ..evaluation.visualization_utils import plot_patches
+
+class IntermediatePatchPlot(AbstractCallback):
+    """
+    Callback to plot model generated outputs alongside ground 
+    truth and input at the end end of each epoch.
+    """
+
+    def __init__(self,                
+                 name: str, 
+                 path: str, 
+                 dataset: PatchDataset, 
+                 plot_n_patches: int=5,
+                 plot_metrics: List[nn.Module]=None,
+                 **kwargs):
+        """
+        Initialize the IntermediatePlot callback.
+
+        :param name: Name of the callback.
+        :type name: str
+        :param path: Path to save the model weights.
+        :type path: str
+        :param dataset: Dataset to be used for plotting intermediate results.
+        :type dataset: PatchDataset
+        :param plot_n_patches: Number of patches to plot, defaults to 5.
+        :type plot_n_patches: int, optional
+        :param plot_metrics: List of metrics to compute and display in plot title, defaults to None.
+        :type plot_metrics: List[nn.Module], optional
+        :param kwargs: Additional keyword arguments to be passed to plot_patches.
+        :type kwargs: dict
+        :raises TypeError: If the dataset is not an instance of PatchDataset.
+        """
+        super().__init__(name)
+        self._path = path
+        if not isinstance(dataset, PatchDataset):
+            raise TypeError(f"Expected PatchDataset, got {type(dataset)}")
+        self._dataset = dataset
+
+        # Additional kwargs passed to plot_patches
+        self.plot_n_patches = plot_n_patches
+        self.plot_metrics = plot_metrics
+        self.plot_kwargs = kwargs
+
+    def on_epoch_end(self):
+        """
+        Called at the end of each epoch.
+
+        Plot dataset with model predictions on n random images from dataset at the end of each epoch.
+        """
+
+        original_device = next(self.trainer.model.parameters()).device
+
+        plot_patches(
+            _dataset = self._dataset,
+            _n_patches = self.plot_n_patches,
+            _model = self.trainer.model,
+            _metrics = self.plot_metrics,
+            save_path = f"{self._path}/epoch_{self.trainer.epoch}.png",
+            device=original_device,
+            **self.plot_kwargs
+        )
diff --git a/callbacks/MlflowLogger.py b/callbacks/MlflowLogger.py
@@ -0,0 +1,93 @@
+import os
+import pathlib
+import tempfile
+from typing import Union
+
+import mlflow
+import torch 
+
+from .AbstractCallback import AbstractCallback
+
+class MlflowLogger(AbstractCallback):
+    """
+    Callback to log metrics to MLflow.
+    """
+
+    def __init__(self, 
+
+                 name: str,
+                 artifact_name: str = 'best_model_weights.pth',
+                 mlflow_uri: Union[pathlib.Path, str] = 'mlruns',
+                 mlflow_experiment_name: str = 'Default',
+                 mlflow_start_run_args: dict = {},
+                 mlflow_log_params_args: dict = {},
+
+                 ):
+        """
+        Initialize the MlflowLogger callback.
+
+        :param name: Name of the callback.
+        :type name: str
+        :param artifact_name: Name of the artifact file to log, defaults to 'best_model_weights.pth'.
+        :type artifact_name: str, optional
+        :param mlflow_uri: URI for the MLflow tracking server, defaults to 'mlruns' under current wd.
+        :type mlflow_uri: pathlib.Path or str, optional
+        :param mlflow_experiment_name: Name of the MLflow experiment, defaults to 'Default'.
+        :type mlflow_experiment_name: str, optional
+        :param mlflow_start_run_args: Additional arguments for starting an MLflow run, defaults to {}.
+        :type mlflow_start_run_args: dict, optional
+        :param mlflow_log_params_args: Additional arguments for logging parameters to MLflow, defaults to {}.
+        :type mlflow_log_params_args: dict, optional
+        """
+        super().__init__(name)
+
+        try:
+            mlflow.set_tracking_uri(mlflow_uri)
+            mlflow.set_experiment(mlflow_experiment_name)
+        except Exception as e:
+            print(f"Error setting MLflow tracking URI: {e}")
+
+        self._artifact_name = artifact_name
+        self._mlflow_start_run_args = mlflow_start_run_args
+        self._mlflow_log_params_args = mlflow_log_params_args
+
+    def on_train_start(self):
+        """
+        Called at the start of training.
+
+        Calls mlflow start run and logs params if provided
+        """
+        mlflow.start_run(
+            **self._mlflow_start_run_args
+        )
+        mlflow.log_params(
+            self._mlflow_log_params_args
+        )
+
+    def on_epoch_end(self):
+        """
+        Called at the end of each epoch.
+
+        Iterate over the most recent log items in trainer and call mlflow log metric
+        """
+        for key, values in self.trainer.log.items():
+            if values is not None and len(values) > 0: 
+                value = values[-1]
+            else:
+                value = None
+            mlflow.log_metric(key, value, step=self.trainer.epoch)
+
+    def on_train_end(self):
+        """
+        Called at the end of training.
+
+        Saves trainer best model to a temporary directory and calls mlflow log artifact
+        Then ends run
+        """
+        # Save weights to a temporary directory and log artifacts
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            weights_path = os.path.join(tmpdirname, self._artifact_name)
+            torch.save(self.trainer.best_model, weights_path)
+            mlflow.log_artifact(weights_path, artifact_path="models")
+
+        mlflow.end_run()        
diff --git a/callbacks/README.md b/callbacks/README.md
@@ -0,0 +1,3 @@
+Here lives the callback classes that are meant to be fed into trainers to do stuff like saving images every epoch and logging. 
+
+The callback classes must inherit the abstract class. 
diff --git a/cp_gan_env.yml b/cp_gan_env.yml
@@ -0,0 +1,39 @@
+name: cp_gan_env
+channels:
+  - anaconda
+  - pytorch
+  - nvidia
+  - conda-forge
+dependencies:
+  - conda-forge::python=3.9
+  - conda-forge::pip
+  - pytorch::pytorch
+  - pytorch::torchvision
+  - pytorch::torchaudio
+  - pytorch::pytorch-cuda=12.1
+  - conda-forge::seaborn
+  - conda-forge::matplotlib
+  - conda-forge::jupyter
+  - conda-forge::pre_commit
+  - conda-forge::pandas
+  - conda-forge::pillow
+  - conda-forge::numpy
+  - conda-forge::pathlib2
+  - conda-forge::scikit-learn
+  - conda-forge::opencv
+  - conda-forge::pyarrow
+  - conda-forge::ipython
+  - conda-forge::notebook
+  - conda-forge::albumentations
+  - conda-forge::optuna
+  - conda-forge::mysqlclient
+  - conda-forge::openjdk
+  - conda-forge::gtk2
+  - conda-forge::typing-extensions
+  - conda-forge::Jinja2
+  - conda-forge::inflect
+  - conda-forge::wxpython
+  - conda-forge::sentry-sdk
+  - pip:
+      - mlflow
+      - cellprofiler==4.2.8
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Here lives the callback classes that are meant to be fed into trainers to do stuff like saving images every epoch and logging.

		The callback classes must inherit the abstract class.