Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ python src/eval.py --config-name=eval.yaml \
- `--config-name=eval.yaml`- sets task to be [`configs/eval.yaml`](../configs/eval.yaml)
- `experiment=eval/tofu/default`- set experiment to use [`configs/eval/tofu/default.yaml`](../configs/eval/tofu/default.yaml)
- `model=Llama-3.2-3B-Instruct`- override the default (`Llama-3.2-1B-Instruct`) model config to use [`configs/model/Llama-3.2-3B-Instruct`](../configs/model/Phi-3.5-mini-instruct.yaml).
- Output directory: constructed as `saves/eval/SAMPLE_EVAL`


Run the MUSE-Books benchmark evaluation on a checkpoint of a Phi-3.5 model:
Expand Down
3 changes: 3 additions & 0 deletions docs/experiments.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ The large number of component variants supported in this repository creates the

At the core, three main Hydra configs—`train.yaml` (generic training), `eval.yaml` (running evaluation), and `unlearn.yaml` (unlearning training)—provide the base configuration for the main types of experiments. These are then extended by experiment-specific configs and command-line overrides. We set up experiment configs for common usecases like LLaMA-2 unlearning on TOFU, LLaMA-2 evaluation on MUSE etc. which set the required datasets, models, and base train and eval configs to make things easier.

Experiment output directories are constructed based on the task mode (`train` / `eval` / `unlearn`) and the task name (provided by the user) as `./saves/${mode}/${task_name}`. The experiment logging will display where the model checkpoints, logs and evaluation dumps are stored.

---

### Table of Contents
Expand All @@ -34,6 +36,7 @@ At the core, three main Hydra configs—`train.yaml` (generic training), `eval.y
python src/train.py --config-name=train.yaml experiment=finetune/tofu/default task_name=SAMPLE_TRAIN

## runs an unlearning training using experiment details from configs/unlearn/tofu/default.yaml
# output directory will be constructed as: saves/unlearn/SAMPLE_UNLEARN
python src/train.py --config-name=unlearn.yaml experiment=unlearn/tofu/default task_name=SAMPLE_TRAIN


Expand Down
12 changes: 9 additions & 3 deletions src/evals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ def __init__(self, name, eval_cfg, **kwargs):
self.eval_cfg = eval_cfg
self.metrics_cfg = self.eval_cfg.metrics
self.metrics = self.load_metrics(self.metrics_cfg)
logger.info(f"Output directory {self.eval_cfg.output_dir}")
logger.info(
f"Evaluations stored in the experiment directory: {self.eval_cfg.output_dir}"
)

def get_logs_file_path(self, output_dir, suffix="EVAL"):
"""Returns the path to json file to store results"""
Expand Down Expand Up @@ -71,10 +73,14 @@ def evaluate(self, model, output_dir=None, overwrite=None, **kwargs):
logs_file_path = self.get_logs_file_path(output_dir)
summary_file_path = self.get_logs_file_path(output_dir, suffix="SUMMARY")

# Load exisiting results from file if any.
# Load existing results from file if any.
logs = self.load_logs_from_file(logs_file_path) if not overwrite else {}

logger.info(f"***** Running {self.name} evaluation suite *****")
logger.info(f"Fine-grained evaluations will be saved to: {logs_file_path}")
logger.info(
f"Aggregated evaluations will be summarised in: {summary_file_path}"
)
for metric_name, metric_fn in self.metrics.items():
if not overwrite and metric_name in logs and logs[metric_name]:
logger.info(f"Skipping {metric_name}, already evaluated.")
Expand All @@ -100,7 +106,7 @@ def evaluate(self, model, output_dir=None, overwrite=None, **kwargs):
)
if "agg_value" in result:
logger.info(f"Result for metric {metric_name}:\t{result['agg_value']}")

self.save_logs(logs, logs_file_path)
self.save_logs(self.summarize(logs), summary_file_path)

return logs
7 changes: 7 additions & 0 deletions src/trainer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
from trainer.unlearn.simnpo import SimNPO
from trainer.unlearn.rmu import RMU

import logging

logger = logging.getLogger(__name__)

TRAINER_REGISTRY: Dict[str, Any] = {}


Expand Down Expand Up @@ -67,6 +71,9 @@ def load_trainer(
template_args=template_args,
**method_args,
)
logger.info(
f"{trainer_handler_name} Trainer loaded, output_dir: {trainer_args.output_dir}"
)
return trainer, trainer_args


Expand Down