locuslab · molereddy · Apr 7, 2025 · Apr 7, 2025
diff --git a/docs/evaluation.md b/docs/evaluation.md
@@ -21,6 +21,7 @@ python src/eval.py --config-name=eval.yaml \
 - `--config-name=eval.yaml`- sets task to be [`configs/eval.yaml`](../configs/eval.yaml)
 - `experiment=eval/tofu/default`- set experiment to use [`configs/eval/tofu/default.yaml`](../configs/eval/tofu/default.yaml)
 - `model=Llama-3.2-3B-Instruct`- override the default (`Llama-3.2-1B-Instruct`) model config to use [`configs/model/Llama-3.2-3B-Instruct`](../configs/model/Phi-3.5-mini-instruct.yaml).
+- Output directory: constructed as `saves/eval/SAMPLE_EVAL`
 
 
 Run the MUSE-Books benchmark evaluation on a checkpoint of a Phi-3.5 model:

diff --git a/docs/experiments.md b/docs/experiments.md
@@ -11,6 +11,8 @@ The large number of component variants supported in this repository creates the
 
 At the core, three main Hydra configs—`train.yaml` (generic training), `eval.yaml` (running evaluation), and `unlearn.yaml` (unlearning training)—provide the base configuration for the main types of experiments. These are then extended by experiment-specific configs and command-line overrides. We set up experiment configs for common usecases like LLaMA-2 unlearning on TOFU, LLaMA-2 evaluation on MUSE etc. which set the required datasets, models, and base train and eval configs to make things easier.
 
+Experiment output directories are constructed based on the task mode (`train` / `eval` / `unlearn`) and the task name (provided by the user) as `./saves/${mode}/${task_name}`. The experiment logging will display where the model checkpoints, logs and evaluation dumps are stored.
+
 ---
 
 ### Table of Contents
@@ -34,6 +36,7 @@ At the core, three main Hydra configs—`train.yaml` (generic training), `eval.y
 python src/train.py --config-name=train.yaml experiment=finetune/tofu/default task_name=SAMPLE_TRAIN
 
 ## runs an unlearning training using experiment details from configs/unlearn/tofu/default.yaml
+# output directory will be constructed as: saves/unlearn/SAMPLE_UNLEARN
 python src/train.py --config-name=unlearn.yaml experiment=unlearn/tofu/default task_name=SAMPLE_TRAIN
 
 

diff --git a/src/evals/base.py b/src/evals/base.py
@@ -12,7 +12,9 @@ def __init__(self, name, eval_cfg, **kwargs):
         self.eval_cfg = eval_cfg
         self.metrics_cfg = self.eval_cfg.metrics
         self.metrics = self.load_metrics(self.metrics_cfg)
-        logger.info(f"Output directory {self.eval_cfg.output_dir}")
+        logger.info(
+            f"Evaluations stored in the experiment directory: {self.eval_cfg.output_dir}"
+        )
 
     def get_logs_file_path(self, output_dir, suffix="EVAL"):
         """Returns the path to json file to store results"""
@@ -71,10 +73,14 @@ def evaluate(self, model, output_dir=None, overwrite=None, **kwargs):
         logs_file_path = self.get_logs_file_path(output_dir)
         summary_file_path = self.get_logs_file_path(output_dir, suffix="SUMMARY")
 
-        # Load exisiting results from file if any.
+        # Load existing results from file if any.
         logs = self.load_logs_from_file(logs_file_path) if not overwrite else {}
 
         logger.info(f"***** Running {self.name} evaluation suite *****")
+        logger.info(f"Fine-grained evaluations will be saved to: {logs_file_path}")
+        logger.info(
+            f"Aggregated evaluations will be summarised in: {summary_file_path}"
+        )
         for metric_name, metric_fn in self.metrics.items():
             if not overwrite and metric_name in logs and logs[metric_name]:
                 logger.info(f"Skipping {metric_name}, already evaluated.")
@@ -100,7 +106,7 @@ def evaluate(self, model, output_dir=None, overwrite=None, **kwargs):
             )
             if "agg_value" in result:
                 logger.info(f"Result for metric {metric_name}:\t{result['agg_value']}")
-
             self.save_logs(logs, logs_file_path)
             self.save_logs(self.summarize(logs), summary_file_path)
+
         return logs
diff --git a/src/trainer/__init__.py b/src/trainer/__init__.py
@@ -11,6 +11,10 @@
 from trainer.unlearn.simnpo import SimNPO
 from trainer.unlearn.rmu import RMU
 
+import logging
+
+logger = logging.getLogger(__name__)
+
 TRAINER_REGISTRY: Dict[str, Any] = {}
 
 
@@ -67,6 +71,9 @@ def load_trainer(
         template_args=template_args,
         **method_args,
     )
+    logger.info(
+        f"{trainer_handler_name} Trainer loaded, output_dir: {trainer_args.output_dir}"
+    )
     return trainer, trainer_args