From 54d3560daf1dc50a390c722e5175aab62333d1ca Mon Sep 17 00:00:00 2001 From: Anmol Mekala <49127549+molereddy@users.noreply.github.com> Date: Sat, 1 Mar 2025 09:13:50 -0500 Subject: [PATCH 1/9] Fix hyperlinks in README (#2) * testing commit * Fixes * cleanup --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index dce38e5..a04d486 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ We provide several variants for each of the components in the unlearning pipelin ## πŸ“Œ Table of Contents - πŸ“– [Overview](#-overview) -- πŸ—ƒοΈ [Available Components](#-available-components) +- πŸ—ƒοΈ [Available Components](#%EF%B8%8F-available-components) - ⚑ [Quickstart](#-quickstart) - πŸ› οΈ [Environment Setup](#-environment-setup) - πŸ’Ύ [Data Setup](#-data-setup) @@ -56,7 +56,7 @@ We provide several variants for each of the components in the unlearning pipelin - βž• [How to Add New Components](#-how-to-add-new-components) - πŸ“š [Further Documentation](#-further-documentation) - πŸ”— [Support & Contributors](#-support--contributors) -- πŸ“ [Citing this work](#-citating-this-work) +- πŸ“ [Citing this work](#-citing-this-work) - 🀝 [Acknowledgements](#-acknowledgements) - πŸ“„ [License](#-license) @@ -198,7 +198,7 @@ If you use OpenUnlearning in your research, please cite: --- -### 🀝 Acknowledgments +### 🀝 Acknowledgements - This repo is inspired from [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory). - The [TOFU](https://github.com/locuslab/tofu) and [MUSE](https://github.com/jaechan-repo/muse_bench) benchmarks served as the foundation for our re-implementation. From 4c36e4f5a39d979280efd20452be7ba5ff54e40a Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 18:37:19 +0000 Subject: [PATCH 2/9] Fixed DPO command --- scripts/tofu_unlearn.sh | 56 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/scripts/tofu_unlearn.sh b/scripts/tofu_unlearn.sh index a556bd1..eba38ff 100644 --- a/scripts/tofu_unlearn.sh +++ b/scripts/tofu_unlearn.sh @@ -9,12 +9,6 @@ models=( "Llama-3.2-3B-Instruct" "Llama-3.1-8B-Instruct" ) -trainers_experiments=( - "GradAscent unlearn/tofu/default.yaml" - "GradDiff unlearn/tofu/default.yaml" - "NPO unlearn/tofu/default.yaml" - "DPO unlearn/tofu/default.yaml" -) forget_retain_splits=( "forget01 retain99" "forget05 retain95" @@ -29,7 +23,57 @@ gradient_accumulation_steps=4 ########################################### Unlearn TOFU models ######################################################## ######################################################################################################################## +trainers_experiments=( + "GradAscent unlearn/tofu/default.yaml" + "GradDiff unlearn/tofu/default.yaml" + "NPO unlearn/tofu/default.yaml" +) +for split in "${forget_retain_splits[@]}"; do + forget_split=$(echo $split | cut -d' ' -f1) + retain_split=$(echo $split | cut -d' ' -f2) + for model in "${models[@]}"; do + for trainer_experiment in "${trainers_experiments[@]}"; do + trainer=$(echo $trainer_experiment | cut -d' ' -f1) + experiment=$(echo $trainer_experiment | cut -d' ' -f2) + + task_name=tofu_${model}_${forget_split}_${trainer} + model_path=open-unlearning/tofu_${model}_full + echo ${task_name}: Unlearning ${model_path} using ${trainer} + + # Unlearn + CUDA_VISIBLE_DEVICES=0,1 accelerate launch --config_file configs/accelerate/default_config.yaml --main_process_port $MASTER_PORT \ + src/train.py --config-name=unlearn.yaml \ + experiment=${experiment} \ + trainer=${trainer} \ + task_name=${task_name} \ + model=${model} \ + forget_split=${forget_split} \ + retain_split=${retain_split} \ + model.model_args.pretrained_model_name_or_path=${model_path} \ + retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json \ + trainer.args.per_device_train_batch_size=$per_device_train_batch_size \ + trainer.args.gradient_accumulation_steps=$gradient_accumulation_steps \ + trainer.args.ddp_find_unused_parameters=true \ + trainer.args.gradient_checkpointing=true + + # Eval + CUDA_VISIBLE_DEVICES=0 python src/eval.py \ + experiment=eval/tofu/default.yaml \ + forget_split=${forget_split} \ + model=${model} \ + task_name=${task_name} \ + model.model_args.pretrained_model_name_or_path=saves/unlearn/${task_name} \ + paths.output_dir=saves/unlearn/${task_name}/evals \ + retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json + done + done +done + + +trainers_experiments=( + "DPO unlearn/tofu/idk.yaml" +) for split in "${forget_retain_splits[@]}"; do forget_split=$(echo $split | cut -d' ' -f1) retain_split=$(echo $split | cut -d' ' -f2) From f7a69dee40cfe918e32250ce7c1ba7564205c9b4 Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 18:50:47 +0000 Subject: [PATCH 3/9] download idk --- setup_data.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup_data.py b/setup_data.py index 48de0ad..358779c 100644 --- a/setup_data.py +++ b/setup_data.py @@ -1,8 +1,17 @@ from huggingface_hub import snapshot_download +# Setup retain model metrics snapshot_download( repo_id="open-unlearning/eval", allow_patterns="*.json", repo_type="dataset", local_dir="saves/eval", ) + +# Setup data +snapshot_download( + repo_id="open-unlearning/idk", + allow_patterns="*.jsonl", + repo_type="dataset", + local_dir="data", +) \ No newline at end of file From 332af36c4772eb1e836b767adbd102951693b60c Mon Sep 17 00:00:00 2001 From: Vineeth <48151992+Dornavineeth@users.noreply.github.com> Date: Sun, 2 Mar 2025 14:13:43 -0500 Subject: [PATCH 4/9] Revert "Dpo fix" --- scripts/tofu_unlearn.sh | 56 +++++------------------------------------ setup_data.py | 9 ------- 2 files changed, 6 insertions(+), 59 deletions(-) diff --git a/scripts/tofu_unlearn.sh b/scripts/tofu_unlearn.sh index eba38ff..a556bd1 100644 --- a/scripts/tofu_unlearn.sh +++ b/scripts/tofu_unlearn.sh @@ -9,6 +9,12 @@ models=( "Llama-3.2-3B-Instruct" "Llama-3.1-8B-Instruct" ) +trainers_experiments=( + "GradAscent unlearn/tofu/default.yaml" + "GradDiff unlearn/tofu/default.yaml" + "NPO unlearn/tofu/default.yaml" + "DPO unlearn/tofu/default.yaml" +) forget_retain_splits=( "forget01 retain99" "forget05 retain95" @@ -23,57 +29,7 @@ gradient_accumulation_steps=4 ########################################### Unlearn TOFU models ######################################################## ######################################################################################################################## -trainers_experiments=( - "GradAscent unlearn/tofu/default.yaml" - "GradDiff unlearn/tofu/default.yaml" - "NPO unlearn/tofu/default.yaml" -) -for split in "${forget_retain_splits[@]}"; do - forget_split=$(echo $split | cut -d' ' -f1) - retain_split=$(echo $split | cut -d' ' -f2) - for model in "${models[@]}"; do - for trainer_experiment in "${trainers_experiments[@]}"; do - trainer=$(echo $trainer_experiment | cut -d' ' -f1) - experiment=$(echo $trainer_experiment | cut -d' ' -f2) - - task_name=tofu_${model}_${forget_split}_${trainer} - model_path=open-unlearning/tofu_${model}_full - echo ${task_name}: Unlearning ${model_path} using ${trainer} - - # Unlearn - CUDA_VISIBLE_DEVICES=0,1 accelerate launch --config_file configs/accelerate/default_config.yaml --main_process_port $MASTER_PORT \ - src/train.py --config-name=unlearn.yaml \ - experiment=${experiment} \ - trainer=${trainer} \ - task_name=${task_name} \ - model=${model} \ - forget_split=${forget_split} \ - retain_split=${retain_split} \ - model.model_args.pretrained_model_name_or_path=${model_path} \ - retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json \ - trainer.args.per_device_train_batch_size=$per_device_train_batch_size \ - trainer.args.gradient_accumulation_steps=$gradient_accumulation_steps \ - trainer.args.ddp_find_unused_parameters=true \ - trainer.args.gradient_checkpointing=true - - # Eval - CUDA_VISIBLE_DEVICES=0 python src/eval.py \ - experiment=eval/tofu/default.yaml \ - forget_split=${forget_split} \ - model=${model} \ - task_name=${task_name} \ - model.model_args.pretrained_model_name_or_path=saves/unlearn/${task_name} \ - paths.output_dir=saves/unlearn/${task_name}/evals \ - retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json - done - done -done - - -trainers_experiments=( - "DPO unlearn/tofu/idk.yaml" -) for split in "${forget_retain_splits[@]}"; do forget_split=$(echo $split | cut -d' ' -f1) retain_split=$(echo $split | cut -d' ' -f2) diff --git a/setup_data.py b/setup_data.py index 358779c..48de0ad 100644 --- a/setup_data.py +++ b/setup_data.py @@ -1,17 +1,8 @@ from huggingface_hub import snapshot_download -# Setup retain model metrics snapshot_download( repo_id="open-unlearning/eval", allow_patterns="*.json", repo_type="dataset", local_dir="saves/eval", ) - -# Setup data -snapshot_download( - repo_id="open-unlearning/idk", - allow_patterns="*.jsonl", - repo_type="dataset", - local_dir="data", -) \ No newline at end of file From f468efb9eaa0c737e6e8b4e64abb411131ff7a99 Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 19:22:21 +0000 Subject: [PATCH 5/9] download idk data --- setup_data.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup_data.py b/setup_data.py index 48de0ad..358779c 100644 --- a/setup_data.py +++ b/setup_data.py @@ -1,8 +1,17 @@ from huggingface_hub import snapshot_download +# Setup retain model metrics snapshot_download( repo_id="open-unlearning/eval", allow_patterns="*.json", repo_type="dataset", local_dir="saves/eval", ) + +# Setup data +snapshot_download( + repo_id="open-unlearning/idk", + allow_patterns="*.jsonl", + repo_type="dataset", + local_dir="data", +) \ No newline at end of file From ca8d5038b07ca8b8bbf0a71bf8a9a5502899f154 Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 19:22:45 +0000 Subject: [PATCH 6/9] fix dpo experiment config --- scripts/tofu_unlearn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tofu_unlearn.sh b/scripts/tofu_unlearn.sh index a556bd1..1794c9b 100644 --- a/scripts/tofu_unlearn.sh +++ b/scripts/tofu_unlearn.sh @@ -13,7 +13,7 @@ trainers_experiments=( "GradAscent unlearn/tofu/default.yaml" "GradDiff unlearn/tofu/default.yaml" "NPO unlearn/tofu/default.yaml" - "DPO unlearn/tofu/default.yaml" + "DPO unlearn/tofu/idk.yaml" ) forget_retain_splits=( "forget01 retain99" From 8b073d66a7a08b4923eeffa6cbb485ffcc3246eb Mon Sep 17 00:00:00 2001 From: Vineeth <48151992+Dornavineeth@users.noreply.github.com> Date: Sun, 9 Mar 2025 15:32:43 -0400 Subject: [PATCH 7/9] RMU (#6) * IdkDPO script fix in tofu_unlearn.sh (#65) * Fix hyperlinks in README * Download I don't know data in setup_data.py * Fix tofu_unlearn.sh for IdkDPO --------- Co-authored-by: Anmol Mekala <49127549+molereddy@users.noreply.github.com> * overwrite=True * RMU added * Fix ref model device * ruff fix * RMU updated * Update rmu.py * Update README.md: add RMU * Added references and renamed functions --------- Co-authored-by: Anmol Mekala <49127549+molereddy@users.noreply.github.com> --- README.md | 4 +- configs/experiment/unlearn/muse/default.yaml | 1 + .../experiment/unlearn/muse/scalability.yaml | 1 + .../unlearn/muse/sustainabilty.yaml | 1 + configs/experiment/unlearn/tofu/default.yaml | 1 + configs/experiment/unlearn/tofu/idk.yaml | 1 + configs/trainer/RMU.yaml | 14 ++ docs/results.md | 37 ++++- scripts/tofu_unlearn.sh | 1 + src/trainer/__init__.py | 2 + src/trainer/unlearn/grad_diff.py | 2 +- src/trainer/unlearn/rmu.py | 142 ++++++++++++++++++ 12 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 configs/trainer/RMU.yaml create mode 100644 src/trainer/unlearn/rmu.py diff --git a/README.md b/README.md index a04d486..4c2cf8a 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ ## πŸ“– Overview -We provide efficient and streamlined implementations of the TOFU, MUSE unlearning benchmarks while supporting 5 unlearning methods, 3+ datasets, 6+ evaluation metrics, and 7+ LLMs. Each of these can be easily extended to incorporate more variants. +We provide efficient and streamlined implementations of the TOFU, MUSE unlearning benchmarks while supporting 6 unlearning methods, 3+ datasets, 6+ evaluation metrics, and 7+ LLMs. Each of these can be easily extended to incorporate more variants. We invite the LLM unlearning community to collaborate by adding new benchmarks, unlearning methods, datasets and evaluation metrics here to expand OpenUnlearning's features, gain feedback from wider usage and drive progress in the field. @@ -35,7 +35,7 @@ We provide several variants for each of the components in the unlearning pipelin | **Component** | **Available Options** | |------------------------|----------------------| | **Benchmarks** | [TOFU](https://arxiv.org/abs/2401.06121), [MUSE](https://muse-bench.github.io/) | -| **Unlearning Methods** | GradAscent, GradDiff, NPO, SimNPO, DPO | +| **Unlearning Methods** | GradAscent, GradDiff, NPO, SimNPO, DPO, RMU | | **Evaluation Metrics** | Verbatim Probability, Verbatim ROUGE, QA-ROUGE, MIA Attacks, TruthRatio, Model Utility | | **Datasets** | MUSE-News (BBC), MUSE-Books (Harry Potter), TOFU (different splits) | | **Model Families** | TOFU: LLaMA-3.2, LLaMA-3.1, LLaMA-2; MUSE: LLaMA-2, ICLM; Additional: Phi-3.5, Phi-1.5, Gemma | diff --git a/configs/experiment/unlearn/muse/default.yaml b/configs/experiment/unlearn/muse/default.yaml index 454a84e..b4bdbe0 100644 --- a/configs/experiment/unlearn/muse/default.yaml +++ b/configs/experiment/unlearn/muse/default.yaml @@ -34,6 +34,7 @@ eval: muse: data_split: ${data_split} retain_logs_path: ${retain_logs_path} + overwrite: true trainer: args: diff --git a/configs/experiment/unlearn/muse/scalability.yaml b/configs/experiment/unlearn/muse/scalability.yaml index 11d90f5..b19e0cb 100644 --- a/configs/experiment/unlearn/muse/scalability.yaml +++ b/configs/experiment/unlearn/muse/scalability.yaml @@ -34,6 +34,7 @@ eval: muse: data_split: ${data_split} retain_logs_path: ${retain_logs_path} + overwrite: true trainer: args: diff --git a/configs/experiment/unlearn/muse/sustainabilty.yaml b/configs/experiment/unlearn/muse/sustainabilty.yaml index e5d7968..9a0a03e 100644 --- a/configs/experiment/unlearn/muse/sustainabilty.yaml +++ b/configs/experiment/unlearn/muse/sustainabilty.yaml @@ -34,6 +34,7 @@ eval: muse: data_split: ${data_split} retain_logs_path: ${retain_logs_path} + overwrite: true trainer: args: diff --git a/configs/experiment/unlearn/tofu/default.yaml b/configs/experiment/unlearn/tofu/default.yaml index 5f7c475..f2e0ab1 100644 --- a/configs/experiment/unlearn/tofu/default.yaml +++ b/configs/experiment/unlearn/tofu/default.yaml @@ -20,6 +20,7 @@ eval: tofu: forget_split: ${forget_split} retain_logs_path: ${retain_logs_path} + overwrite: true data: anchor: forget diff --git a/configs/experiment/unlearn/tofu/idk.yaml b/configs/experiment/unlearn/tofu/idk.yaml index 61a365d..5fcb85d 100644 --- a/configs/experiment/unlearn/tofu/idk.yaml +++ b/configs/experiment/unlearn/tofu/idk.yaml @@ -20,6 +20,7 @@ eval: tofu: forget_split: ${forget_split} retain_logs_path: ${retain_logs_path} + overwrite: true data: anchor: forget diff --git a/configs/trainer/RMU.yaml b/configs/trainer/RMU.yaml new file mode 100644 index 0000000..7e1f902 --- /dev/null +++ b/configs/trainer/RMU.yaml @@ -0,0 +1,14 @@ +defaults: + - GradDiff + +handler: RMU +method_args: + # The params here are more dependent on model and dataset. Tune them carefully to work + gamma: 1.0 + steering_coeff: 2 + retain_loss_type: EMBED_DIFF + alpha: 1 + module_regex: model\.layers\.7 + trainable_params_regex: + - .* # update all parameters (as done in https://github.com/tmlr-group/G-effect/blob/ef368eea3b2c6dba1e090b9ebb021ac9f047e0ae/dataloader.py#L271) + # - model\.layers\.(5|6|7)\.mlp\.down_proj\.weight # If you want to update only these weights (as done in https://github.com/centerforaisafety/wmdp/blob/bc5e1ba0367ea826caeeeaa50656336a1e87acfb/rmu/unlearn.py#L26) \ No newline at end of file diff --git a/docs/results.md b/docs/results.md index 3f7cc7c..3af7cb6 100644 --- a/docs/results.md +++ b/docs/results.md @@ -23,7 +23,7 @@ For all the experiments below, we used the following setup | **Hyperparameters** | Learning Rate (lr) = 1e-5
Ξ± = 1, Ξ³ = 1, Ξ² = 0.1 (where applicable)
Number of Epochs = 10
Optimizer: [paged_adamw_32bit](https://huggingface.co/docs/bitsandbytes/main/en/reference/optim/adamw#bitsandbytes.optim.PagedAdamW) | __Note:__ -1. Results may vary even with the same effective hyperparameters when trained with modifications to the distributed training setup, including when training on a single GPU. For example: methods such as SimNPO, can be significantly improved with careful tuning. **Please use these numbers only for reproducibility purposes**. +1. Results may vary even with the same effective hyperparameters when trained with modifications to the distributed training setup, including when training on a single GPU. For example: methods such as SimNPO & RMU can be significantly improved with careful tuning. **Please use these numbers only for reproducibility purposes**. 2. NPO in MUSE: for NPO, the MUSE implementation is inconsistent with the [original paper](https://github.com/licong-lin/negative-preference-optimization) as discussed [here]( https://github.com/jaechan-repo/muse_bench/issues/2). This inconsistency is carried over into implementations like [SimNPO](https://github.com/OPTML-Group/Unlearn-Simple/issues/5). Here, we use the original NPO implementation with the same loss function expression across datasets. @@ -140,6 +140,18 @@ __Note:__ 0.6 3.17e-04 + + RMU + 0.4 + 0.62 + 0.64 + 9.59e-10 + 0.02 + 0.81 + 6.92e-21 + 0.03 + 0.81 + @@ -257,6 +269,18 @@ __Note:__ 0.54 1.07e-05 + + RMU + 0.16 + 0.55 + 0.70 + 4.87e-10 + 0.58 + 0.77 + 3.15e-15 + 0.59 + 0.76 + @@ -354,6 +378,17 @@ __Note:__ -54.26 0.54 + + RMU + 0.48 + 0.05 + 56.36 + 0.51 + 0.29 + 0.79 + -60.52 + 0.48 + \ No newline at end of file diff --git a/scripts/tofu_unlearn.sh b/scripts/tofu_unlearn.sh index 1794c9b..ae33189 100644 --- a/scripts/tofu_unlearn.sh +++ b/scripts/tofu_unlearn.sh @@ -14,6 +14,7 @@ trainers_experiments=( "GradDiff unlearn/tofu/default.yaml" "NPO unlearn/tofu/default.yaml" "DPO unlearn/tofu/idk.yaml" + "RMU unlearn/tofu/default.yaml" ) forget_retain_splits=( "forget01 retain99" diff --git a/src/trainer/__init__.py b/src/trainer/__init__.py index 1c769bf..7e195fa 100644 --- a/src/trainer/__init__.py +++ b/src/trainer/__init__.py @@ -9,6 +9,7 @@ from trainer.unlearn.npo import NPO from trainer.unlearn.dpo import DPO from trainer.unlearn.simnpo import SimNPO +from trainer.unlearn.rmu import RMU TRAINER_REGISTRY: Dict[str, Any] = {} @@ -79,3 +80,4 @@ def load_trainer( _register_trainer(NPO) _register_trainer(DPO) _register_trainer(SimNPO) +_register_trainer(RMU) diff --git a/src/trainer/unlearn/grad_diff.py b/src/trainer/unlearn/grad_diff.py index e11c7a7..bfecc19 100644 --- a/src/trainer/unlearn/grad_diff.py +++ b/src/trainer/unlearn/grad_diff.py @@ -14,7 +14,7 @@ def __init__(self, gamma=1.0, alpha=1.0, retain_loss_type="NLL", *args, **kwargs self.ref_model = self._prepare_ref_model(self.model) def _prepare_ref_model(self, model): - ref_model = copy.deepcopy(model).to("cuda") + ref_model = copy.deepcopy(model).to(self.accelerator.device) ref_model.eval() if self.is_deepspeed_enabled: ref_model = self._prepare_deepspeed(ref_model) diff --git a/src/trainer/unlearn/rmu.py b/src/trainer/unlearn/rmu.py new file mode 100644 index 0000000..391bd6a --- /dev/null +++ b/src/trainer/unlearn/rmu.py @@ -0,0 +1,142 @@ +"""Borrowed implementation from https://github.com/centerforaisafety/wmdp/blob/main/rmu/unlearn.py""" + +import re +import torch +import deepspeed +from trainer.unlearn.grad_diff import GradDiff + + +class RMU(GradDiff): + def __init__(self, + module_regex="model\.layers\.7", + trainable_params_regex=["model\.layers\.(5|6|7)\.mlp\.down_proj\.weight"], + steering_coeff=20, + *args, **kwargs): + """ + RMU Trainer that fine-tunes only specific layers and parameters using regex-based filtering. + + Args: + module_path (str): Regex pattern to match module names. + trainable_param_paths (list of str): List of regex patterns for trainable parameters. + """ + super().__init__(*args, **kwargs) + + # Create reference model if not already set + if self.ref_model is None: + self.ref_model = self._prepare_ref_model(self.model) + + # Unfreeze only the selected parameters + self.trainable_params_regex = trainable_params_regex # Regex for selecting params + + # Get actual module references + self.module_regex = module_regex # Regex for selecting modules + self.model_module = self._get_matching_module(self.model, self.module_regex) + self.ref_module = self._get_matching_module(self.ref_model, self.module_regex) + self.steering_coeff = steering_coeff + self.control_vec = None + + + def create_optimizer(self): + self._freeze_all_params(self.model, False) + # This makes the optimizer to select only trainable params + self._set_trainable_params(self.model, self.trainable_params_regex, True) + super().create_optimizer() + self._freeze_all_params(self.model, True) + + + def _get_matching_module(self, model, module_regex): + """Returns a single module matching the given regex from a DeepSpeed/DDP-wrapped model.""" + # Handle DeepSpeed and DDP-wrapped models by accessing the underlying module + if isinstance(model, deepspeed.DeepSpeedEngine): + model = model.module # Extract the actual PyTorch model inside + + matched_modules = {name: module for name, module in model.named_modules() if re.fullmatch(module_regex, name)} + + if len(matched_modules) > 1: + raise ValueError(f"More than one module matched with {module_regex}: {list(matched_modules.keys())}") + elif not matched_modules: + raise ValueError(f"No module matched with {module_regex}") + + return next(iter(matched_modules.values())) # Return the single matched module + + def _freeze_all_params(self, model, requires_grad=True): + """Freeze all parameters in the model initially.""" + for param in model.parameters(): + param.requires_grad = requires_grad + + def _set_trainable_params(self, model, trainable_params_regex, requires_grad=True): + """Unfreeze specific parameters that match the regex patterns.""" + for name, param in model.named_parameters(): + if any(re.fullmatch(pattern, name) for pattern in trainable_params_regex): + param.requires_grad = requires_grad + # print(f"{name}:requires_grad\t{requires_grad}") + + def forward_with_cache(self, model, inputs, module, no_grad=True): + """Performs a forward pass while caching the output of a specified module.""" + cache = [] + def hook(module, input, output): + if isinstance(output, tuple): + cache.append(output[0]) + else: + cache.append(output) + return None + + hook_handle = module.register_forward_hook(hook) + with torch.set_grad_enabled(not(no_grad)): + outputs = model(**inputs) + hook_handle.remove() + return cache[0], outputs + + def get_control_vector(self, dim): + if self.control_vec is None: + random_vector = torch.rand(1,1, dim) + self.control_vec = random_vector / torch.norm(random_vector) * self.steering_coeff + return self.control_vec + + + def compute_activation_loss(self, activation1, activation2, mask): + squared_diff = torch.nn.functional.mse_loss(activation1, activation2, reduction="none") # Shape (b, s, d) + expanded_mask = mask.unsqueeze(-1).expand_as(squared_diff) # Shape: [b, s, d] + squared_diff_sum = (squared_diff * expanded_mask).mean(dim=2).sum(dim=(1)) # Shape: [b, 1] + num_tokens = mask.sum(dim=-1, keepdim=True) # Sum over seq_len, Shape: [b, 1] + return (squared_diff_sum / num_tokens).mean() + + def compute_retain_loss(self, model, retain_inputs): + retain_loss = 0.0 + + if self.retain_loss_type == "EMBED_DIFF": + model_retain_activations, _ = self.forward_with_cache(model, retain_inputs, module=self.model_module, no_grad=False) + ref_retain_activations, _ = self.forward_with_cache(self.ref_model, retain_inputs, module=self.ref_module, no_grad=True) + mask = (retain_inputs['labels'] != -100) # Shape: [b, s] + retain_loss = self.compute_activation_loss(model_retain_activations, ref_retain_activations.to(model_retain_activations.device), mask) + else: + retain_loss = super().compute_retain_loss(model, retain_inputs) + return retain_loss + + def compute_loss(self, model, inputs, return_outputs=False): + forget_inputs = inputs["forget"] + forget_inputs = { + "input_ids": forget_inputs["input_ids"], + "attention_mask": forget_inputs["attention_mask"], + "labels": forget_inputs["labels"], + } + + model_forget_activations, forget_outputs = self.forward_with_cache(model, forget_inputs, self.model_module, no_grad=False) + # If multiple datasets or concepts need unlearning, pass the control vector during processing; otherwise, default to a random vector during training. + control_vec = forget_inputs.get("control_vec", self.get_control_vector(model_forget_activations.shape[-1])) + control_vec = control_vec.to(dtype=model_forget_activations.dtype, device=model_forget_activations.device) + control_vec = control_vec.expand_as(model_forget_activations) + mask = (forget_inputs['labels'] != -100) # Shape: [b, s] + forget_loss = self.compute_activation_loss(model_forget_activations, control_vec, mask) + + retain_inputs = inputs["retain"] + retain_inputs = { + "input_ids": retain_inputs["input_ids"], + "attention_mask": retain_inputs["attention_mask"], + "labels": retain_inputs["labels"], + } + retain_loss = self.compute_retain_loss(model=model, retain_inputs=retain_inputs) + + loss = self.gamma * forget_loss + self.alpha * retain_loss + + return (loss, forget_outputs) if return_outputs else loss From dccb831590114c7714f540f19c4d1b9c60b26fea Mon Sep 17 00:00:00 2001 From: Vineeth <48151992+Dornavineeth@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:12:33 -0400 Subject: [PATCH 8/9] Add structure to contributions, setup leaderboard, update documentation (#8) * docs: updates, small corrections, re-formats * modified ruff commands * modified ruff commands * CI/CD minor updates * added contributing + leaderboard * fix minor spelling misatkes * docs: bunch of minor updates * docs fixes --------- Co-authored-by: molereddy --- .github/ISSUE_TEMPLATE/feature-request.yaml | 9 + .github/PULL_REQUEST_TEMPLATE.md | 5 +- .github/workflows/tests.yml | 7 +- .pre-commit-config.yaml | 6 +- Makefile | 6 +- README.md | 50 +++-- community/benchmarks/template/README.md | 51 +++++ community/benchmarks/template/run.sh | 18 ++ community/leaderboard.md | 168 ++++++++++++++++ community/methods/template/README.md | 39 ++++ community/methods/template/run.sh | 13 ++ docs/components.md | 4 +- docs/contributing.md | 207 ++++++++++++++++++++ docs/experiments.md | 4 +- docs/{results.md => repro.md} | 2 + setup.py | 9 +- setup_data.py | 2 +- src/trainer/unlearn/rmu.py | 105 ++++++---- 18 files changed, 620 insertions(+), 85 deletions(-) create mode 100644 community/benchmarks/template/README.md create mode 100644 community/benchmarks/template/run.sh create mode 100644 community/leaderboard.md create mode 100644 community/methods/template/README.md create mode 100644 community/methods/template/run.sh create mode 100644 docs/contributing.md rename docs/{results.md => repro.md} (98%) diff --git a/.github/ISSUE_TEMPLATE/feature-request.yaml b/.github/ISSUE_TEMPLATE/feature-request.yaml index 22ec671..e43e913 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.yaml +++ b/.github/ISSUE_TEMPLATE/feature-request.yaml @@ -32,3 +32,12 @@ body: label: Motivation description: | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. + + - type: textarea + id: implementation + validations: + required: false + attributes: + label: Implementation + description: | + Please describe your proposed solution in detail. Outline the implementation approach, including any key technical considerations. If there are challenges or blockers preventing implementation, specify them along with potential workarounds or dependencies. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index fc69076..fbc9a07 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,6 +6,5 @@ Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). -- [ ] Have you gone through the documentation of adding new [components](../docs/components.md)? -- [ ] Did you make sure to update the documentation with your changes? Here are the pointers to documentation - [documentation guidelines](../README.md#-further-documentation). \ No newline at end of file +- [ ] Have you gone through the contributions [guide](../docs/contributing.md)? +- [ ] Are your changes documented? Read documentation guidelines [here](../README.md#-further-documentation). \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4277042..f1b0afa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,16 +46,11 @@ jobs: run: | python -m pip install --upgrade pip pip install ruff==0.6.6 - # python -m pip install git+https://github.com/huggingface/transformers.git - # python -m pip install ".[torch,dev]" - name: Check Quality run: make quality - # - name: Check Style - # run: make style - # - name: Test with pytest # run: | - # cd LLaMA-Factory + # cd # make test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97a7341..97b7cdf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,6 @@ repos: rev: v0.6.9 hooks: - id: ruff - args: [--fix, scripts, src] - - id: ruff-format - args: [scripts, src] \ No newline at end of file + args: [check, --fix, scripts, src, setup.py, setup_data.py] + - id: ruff + args: [format, scripts, src, setup.py setup_data.py] \ No newline at end of file diff --git a/Makefile b/Makefile index 179c2d5..11bdbb1 100644 --- a/Makefile +++ b/Makefile @@ -3,10 +3,12 @@ check_dirs := scripts src #setup.py quality: - ruff check $(check_dirs) + ruff check $(check_dirs) setup.py setup_data.py + ruff format --check $(check_dirs) setup.py setup_data.py style: - ruff --format $(check_dirs) + ruff check $(check_dirs) setup.py setup_data.py --fix + ruff format $(check_dirs) setup.py setup_data.py test: CUDA_VISIBLE_DEVICES= pytest tests/ diff --git a/README.md b/README.md index 403754b..986b874 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,17 @@ ## πŸ“– Overview -We provide efficient and streamlined implementations of the TOFU, MUSE unlearning benchmarks while supporting 6 unlearning methods, 3+ datasets, 6+ evaluation metrics, and 7+ LLMs. Each of these can be easily extended to incorporate more variants. +We provide efficient and streamlined implementations of the TOFU, MUSE unlearning benchmarks while supporting 6 unlearning methods, 3+ datasets, 6+ evaluation metrics, and 6+ LLM architectures. Each of these can be easily extended to incorporate more variants. We invite the LLM unlearning community to collaborate by adding new benchmarks, unlearning methods, datasets and evaluation metrics here to expand OpenUnlearning's features, gain feedback from wider usage and drive progress in the field. -> ⚠️ **Notice (Updated: February 27, 2025)** -> This repository replaces the original TOFU codebase, which can be found at [`github.com/locuslab/tofu`](https://github.com/locuslab/tofu) and isn't maintained anymore. +### πŸ“’ Updates + +#### [Mar 27, 2025] +- **Easier contributions, leaderboard and reproducibility**: We've updated the documentation to make contributing new unlearning methods and benchmarks much easier. Users can document additions better and also update a leaderboard with their results. See [this section](#-how-to-contribute) for details. + +#### [Feb 27, 2025] +⚠️ **Repository Update**: This repo replaces the original TOFU codebase at [`github.com/locuslab/tofu`](https://github.com/locuslab/tofu), which is no longer maintained. ## πŸ—ƒοΈ Available Components @@ -38,22 +43,21 @@ We provide several variants for each of the components in the unlearning pipelin | **Unlearning Methods** | GradAscent, GradDiff, NPO, SimNPO, DPO, RMU | | **Evaluation Metrics** | Verbatim Probability, Verbatim ROUGE, QA-ROUGE, MIA Attacks, TruthRatio, Model Utility | | **Datasets** | MUSE-News (BBC), MUSE-Books (Harry Potter), TOFU (different splits) | -| **Model Families** | TOFU: LLaMA-3.2, LLaMA-3.1, LLaMA-2; MUSE: LLaMA-2, ICLM; Additional: Phi-3.5, Phi-1.5, Gemma | +| **Model Families** | TOFU: LLaMA-3.2, LLaMA-3.1, LLaMA-2; MUSE: LLaMA-2; Additional: Phi-3.5, Phi-1.5, Gemma | --- ## πŸ“Œ Table of Contents - πŸ“– [Overview](#-overview) +- πŸ“’ [Updates](#-updates) - πŸ—ƒοΈ [Available Components](#%EF%B8%8F-available-components) - ⚑ [Quickstart](#-quickstart) - - πŸ› οΈ [Environment Setup](#-environment-setup) - - πŸ’Ύ [Data Setup](#-data-setup) - πŸ”„ [Updated TOFU benchmark](#-updated-tofu-benchmark) - πŸ§ͺ [Running Experiments](#-running-experiments) - πŸš€ [Perform Unlearning](#-perform-unlearning) - πŸ“Š [Perform an Evaluation](#-perform-an-evaluation) - πŸ“œ [Running Baseline Experiments](#-running-baseline-experiments) -- βž• [How to Add New Components](#-how-to-add-new-components) +- βž• [How to Contribute](#-how-to-contribute) - πŸ“š [Further Documentation](#-further-documentation) - πŸ”— [Support & Contributors](#-support--contributors) - πŸ“ [Citing this work](#-citing-this-work) @@ -64,20 +68,16 @@ We provide several variants for each of the components in the unlearning pipelin ## ⚑ Quickstart -### πŸ› οΈ Environment Setup - ```bash +# environment setup conda create -n unlearning python=3.11 conda activate unlearning pip install . pip install --no-build-isolation flash-attn==2.6.3 -``` - -### πŸ’Ύ Data Setup -Download the log files containing metric results from the models used in the supported benchmarks (including the retain model logs used to compare the unlearned models against). -```bash -python setup_data.py # populates saves/eval with evaluation results of the uploaded models +# data setup +python setup_data.py # saves/eval now contains evaluation results of the uploaded models +# Downloads log files with metric eval results (incl retain model logs) from the models used in the supported benchmarks. ``` --- @@ -103,7 +103,7 @@ python src/train.py --config-name=unlearn.yaml experiment=unlearn/tofu/default \ forget_split=forget10 retain_split=retain90 trainer=GradAscent task_name=SAMPLE_UNLEARN ``` -- `experiment`- Path to the Hydra config file [`configs/experiment/unlearn/muse/default.yaml`](configs/experiment/unlearn/tofu/default.yaml) with default experimental settings for TOFU unlearning, e.g. train dataset, eval benchmark details, model paths etc.. +- `experiment`- Path to the Hydra config file [`configs/experiment/unlearn/tofu/default.yaml`](configs/experiment/unlearn/tofu/default.yaml) with default experimental settings for TOFU unlearning, e.g. train dataset, eval benchmark details, model paths etc.. - `forget_split/retain_split`- Sets the forget and retain dataset splits. - `trainer`- Load [`configs/trainer/GradAscent.yaml`](configs/trainer/GradAscent.yaml) and override the unlearning method with the handler (see config) implemented in [`src/trainer/unlearn/grad_ascent.py`](src/trainer/unlearn/grad_ascent.py). @@ -126,24 +126,21 @@ For more details about creating and running evaluations, refer [`docs/evaluation ### πŸ“œ Running Baseline Experiments -The scripts below execute standard baseline unlearning experiments on the TOFU and MUSE datasets, evaluated using their corresponding benchmarks. The expected results for these are in [`docs/results.md`](docs/results.md). +The scripts below execute standard baseline unlearning experiments on the TOFU and MUSE datasets, evaluated using their corresponding benchmarks. The expected results for these are in [`docs/repro.md`](docs/repro.md). ```bash bash scripts/tofu_unlearn.sh bash scripts/muse_unlearn.sh ``` ---- +The above scripts are not tuned and uses default hyper parameter settings. We encourage you to tune your methods and add your final results in [`community/leaderboard.md`](community/leaderboard.md). -## βž• How to Add New Components +--- -Adding a new component (trainer, evaluation metric, benchmark, model, or dataset) requires defining a new class, registering it, and creating a configuration file. Learn more about adding new components in [`docs/components.md`](docs/components.md). +## βž• How to Contribute -Please feel free to raise a pull request for any new features after setting up the environment in development mode. +If you are interested in contributing to our work, please have a look at [`contributing.md`](docs/contributing.md) guide. -```bash -pip install .[dev] -``` ## πŸ“š Further Documentation @@ -151,11 +148,12 @@ For more in-depth information on specific aspects of the framework, refer to the | **Documentation** | **Contains** | |------------------------------------------------|--------------------------------------------------------------------------------------------------------------------| -| [`docs/components.md`](docs/components.md) | Instructions on how to add new components such as trainers, benchmarks, metrics, models, datasets, etc. | +| [`docs/contributing.md`](docs/contributing.md) | Instructions on how to add new methods, benchmarks, components such as trainers, benchmarks, metrics, models, datasets, etc. | | [`docs/evaluation.md`](docs/evaluation.md) | Detailed instructions on creating and running evaluation metrics and benchmarks. | | [`docs/experiments.md`](docs/experiments.md) | Guide on running experiments in various configurations and settings, including distributed training, fine-tuning, and overriding arguments. | | [`docs/hydra.md`](docs/hydra.md) | Explanation of the Hydra features used in configuration management for experiments. | -| [`docs/results.md`](docs/results.md) | Reference results from various unlearning methods run using this framework on TOFU and MUSE benchmarks. | +| [`community/leaderboard.md`](community/leaderboard.md) | Reference results from various unlearning methods run using this framework on TOFU and MUSE benchmarks. | +| [`docs/repro.md`](docs/repro.md) (deprecated) | Results are provided solely for reproducibility purposes, without any parameter tuning. | --- ## πŸ”— Support & Contributors diff --git a/community/benchmarks/template/README.md b/community/benchmarks/template/README.md new file mode 100644 index 0000000..855952f --- /dev/null +++ b/community/benchmarks/template/README.md @@ -0,0 +1,51 @@ +# TITLE + +- Paper title, authors, links. + +Provide a concise summary of your benchmark details and its contributions. Please avoid using images to keep the repository size manageable. + +# Datasets + +Use a clear and consistent naming convention for dataset splits. + +- [ ] Provide a link to find/download the datasets (preferably HuggingFace). + +# Models + + +- [ ] Upload any unlearning target or reference retain models for unlearning preferably on HuggingFace and provide the path. +- [ ] Model creation details and how they fit in benchmark. + +# Baselines & Results + +Discuss the baselines used and their results. + + +## Setup +Please include the experimental setup for the baselines + +- [ ] **Hyperparameters & Search Space:** Specify key hyperparameters, their search ranges, number of trials etc. +- [ ] **Computational Setup:** Mention the type and number of GPUs used. +- [ ] **DeepSpeed Configuration:** If any modifications were made to the default DeepSpeed config, specify them here. (You may include the config as a code block.) +- [ ] **Other Details:** Any additional setup details crucial for reproducing your method. + +To replicate your results, provide a `run.sh` script that contains all necessary commands to reproduce the final results. Ensure the script is well-documented. + + +# Citation + + +If you use this work, please cite: + +```bibtex + + + +@misc{openunlearning2025, + title={OpenUnlearning: A Unified Framework for LLM Unlearning Benchmarks}, + author={Dorna, Vineeth and Mekala, Anmol and Zhao, Wenlong and McCallum, Andrew and Kolter, J Zico and Maini, Pratyush}, + year={2025}, + howpublished={\url{https://github.com/locuslab/open-unlearning}}, + note={Accessed: February 27, 2025} +} +``` \ No newline at end of file diff --git a/community/benchmarks/template/run.sh b/community/benchmarks/template/run.sh new file mode 100644 index 0000000..a5335b2 --- /dev/null +++ b/community/benchmarks/template/run.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +######################################################################################################################## +########################################### RETAIN Finetuned ####$###################################################### +######################################################################################################################## + + + +######################################################################################################################### +############################################ FULL Finetuned models ###################################################### +######################################################################################################################### + + + + +######################################################################################################################### +############################################ Baseline methods ####$###################################################### +######################################################################################################################### diff --git a/community/leaderboard.md b/community/leaderboard.md new file mode 100644 index 0000000..8803932 --- /dev/null +++ b/community/leaderboard.md @@ -0,0 +1,168 @@ +
+ +# Leaderboard + +
+ +We encourage the community to develop new methods, optimize them for specific benchmarks, and compare results with existing approaches. + +To implement a new method, refer to our [contributing guide](../docs/contributing.md). + +> **Note:** The [results.md](../docs/results.md) file is maintained for reproducibility purposes. However, we encourage contributors to update the leaderboard table instead of the reproducibility table. We will continue refining and tuning baseline methods to keep the leaderboard up to date. + + +### TOFU unlearning on the `Llama-3.2-1B-Instruct` architecture + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Methodforget01forget05forget10
forget_qualitymodel_utilityforget_qualitymodel_utilityforget_qualitymodel_utility
Finetuned0.010.602.96e-130.68.08e-220.6
Retain1.00.601.00.61.00.59
+
+ + + +### TOFU unlearning on the `Llama-3.2-1B-Instruct` architecture + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Methodforget01forget05forget10
forget_qualitymodel_utilityforget_qualitymodel_utilityforget_qualitymodel_utility
Finetuned0.010.602.96e-130.68.08e-220.6
Retain1.00.601.00.61.00.59
+
+ + +### MUSE unlearning on the benchmark's target models + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodNewsBooks
forget_knowmem_ROUGEforget_verbmem_ROUGEprivleakretain_knowmem_ROUGEforget_knowmem_ROUGEforget_verbmem_ROUGEprivleakretain_knowmem_ROUGE
Finetuned0.640.58-99.810.550.471.0-57.260.69
Retain0.330.2100.560.30.1400.69
+
diff --git a/community/methods/template/README.md b/community/methods/template/README.md new file mode 100644 index 0000000..7facb01 --- /dev/null +++ b/community/methods/template/README.md @@ -0,0 +1,39 @@ +# TITLE + +- Paper title, authors, links. + + +Provide a concise summary of your method details and its contributions. Please avoid using images to keep the repository size manageable. + +# Setup + +Please include the experimental setup such as + +- [ ] **Hyperparameters & Search Space:** Specify key hyperparameters, their search ranges, number of trials etc. +- [ ] **Computational Setup:** Mention the type and number of GPUs used. +- [ ] **DeepSpeed Configuration:** If any modifications were made to the default DeepSpeed config, specify them here. (You may include the config as a code block.) +- [ ] **Other Details:** Any additional setup details crucial for reproducing your method. + +# Results + +To replicate your results, provide a `run.sh` script that contains all necessary commands to reproduce the final results. Ensure the script is well-documented. + +It would be appreciated if you can upload the final unlearned model(s) along with their `evals` folders to HuggingFace and provide the link(s) here. As the evaluations are updated, this would help us re-evaluate your model(s). + +# Citation + + +If you use this work, please cite: + +```bibtex + + + +@misc{openunlearning2025, + title={OpenUnlearning: A Unified Framework for LLM Unlearning Benchmarks}, + author={Dorna, Vineeth and Mekala, Anmol and Zhao, Wenlong and McCallum, Andrew and Kolter, J Zico and Maini, Pratyush}, + year={2025}, + howpublished={\url{https://github.com/locuslab/open-unlearning}}, + note={Accessed: February 27, 2025} +} +``` \ No newline at end of file diff --git a/community/methods/template/run.sh b/community/methods/template/run.sh new file mode 100644 index 0000000..8c165e5 --- /dev/null +++ b/community/methods/template/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +######################################################################################################################## +########################################### Hyper parameter tuning ##################################################### +######################################################################################################################## + +# Optional + +######################################################################################################################## +########################################### Final best parameters ##################################################### +######################################################################################################################## + +# Required to replicate your results \ No newline at end of file diff --git a/docs/components.md b/docs/components.md index e75f8df..016932c 100644 --- a/docs/components.md +++ b/docs/components.md @@ -19,7 +19,7 @@ This process involves three main steps: 6. [Collator](#collator) - Handles data collation logic 7. [Experiment](#experiment) - Combines components into a final experiment config -__Note:__ adding each component requires Hydra config management features, which are documented in [`docs/hydra.md`](../docs/hydra.md). +> [!Note] adding each component requires Hydra config management features, which are documented in [`docs/hydra.md`](../docs/hydra.md). --- @@ -147,7 +147,7 @@ To add a new model architecture: ### Implement and register a handler For all the models currently supported, HuggingFace's `AutoModelForCausalLM` and `AutoTokenizer` are used, and therefore the user doesn't need to create or register any handler. -__Note__: Currently, we do not support loading models modified with LoRA and related variants. If you wish use such features, please create define and register model handlers for this logic in [`src/model`](../src/model) and provide the config info as discussed next. +> [!Note]: Currently, we do not support loading models modified with LoRA and related variants. If you wish use such features, please create define and register model handlers for this logic in [`src/model`](../src/model) and provide the config info as discussed next. ### Add to configs Model configurations contain details required to load the model+tokenizer such as paths, chat templating arguments, LoRA parameters etc. in [`configs/models`](../configs/models/). diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..3e398f5 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,207 @@ +# Contributing + +Everyone is welcome to contribute, and every contribution is valued. Aside from coding components, answering questions, assisting others, and improving documentation are all appreciated. + +You can also help by spreading the word! If you find this project useful, please share it with others, cite it, link it on your repositories and posts, or simply ⭐️ the repo to show your support. + +> 🀝 This guide is heavily borrowed from awesome [transformers](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md) guide to contributing. + +## Ways to Contribute + +There are several ways you can contribute to OpenUnlearning: + +* Fix issues with the existing code. +* Submit issues related to bugs or desired new features. +* Support new components (models, datasets, collator etc). +* Implement new unlearning methods. +* Implement new evaluations. +* Contribute to the documentation. + +## Fixing Issues + +If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](#create-a-pull-request) and open a Pull Request! + +## Submitting a Bug-Related Issue or Feature Request + +Do your best to follow these guidelines when submitting a bug-related issue or a feature request. It will make it easier for us to come back to you quickly and with good feedback. + +### Did You Find a Bug? + +Before you report an issue, we would really appreciate it if you could **make sure the bug was not already reported** (use the search bar on GitHub under Issues). Please try to ensure that the bug is in OpenUnlearning itself, and not your code. + +Please include the following information in your issue so we can quickly resolve it: + +* A short, self-contained, code snippet that allows us to reproduce the bug. +* The **full** traceback if an exception is raised. +* The hardware used to run the experiment, including specifications such as the number and type of GPUs etc. +* The hydra config file corresponding to the experiment if needed (since these files ae long you may link them or use a markdown dropdown in your issue). +* Attach any other additional information, like screenshots, you think may help. + +### Do You Want a New Feature? + +If there is a new feature you'd like to see in OpenUnlearning, please open an issue and describe: + +1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it something you worked on and think it could benefit the community? + + Whatever it is, we'd love to hear about it! + +2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you. +3. Provide a *code snippet* that demonstrates the features usage. +4. If the feature is related to a paper, please include a link. + +## Do You Want to Support New Components? + +Adding a new component listed below requires defining a new class, registering it, and creating a configuration file. Learn more about adding new components in [`docs/components.md`](docs/components.md). + +1. [Trainer](components#trainer) - Algorithm used in LLM training or unlearning +2. [Dataset](components#dataset) - Dataset class for preprocessing raw data +3. [Evaluation Metric](components#evaluation-metric) - Metric class implementing model evaluation +4. [Benchmark](components#benchmark) - Suite combining multiple evaluation metrics +5. [Model](components#model) - LLM used in unlearning +6. [Collator](components#collator) - Handles data collation logic +7. [Experiment](components#experiment) - Combines components into a final experiment config + +> **IMPORTANT** πŸš€ +> **We especially encourage** contributions of methods and benchmarks that you've created, since you best understand them and know how to use them. We are ready to expedite their integration into OpenUnlearning. +> When facing difficulties implementing any component, please contact the maintainers to join our discord where we can go in detail with the implementations. + +## Contributing a New Unlearning Method + +### 1. Implement an Unlearning Trainer + +Your method might require a custom loss function, or other trainer related modifications which go here. +Refer to our [Trainer implementation guide](components.md#trainer) to ensure your method integrates well with our framework. + +### 2. Detail Commands to Be Run + +Some methods might involve multiple commands or steps while unlearning: ensure you write a clear `.sh` file that documents this. + +### 3. Run and Tune Your Method on Relevant Benchmarks + +- Once implemented, evaluate your method on applicable benchmarks using the best possible parameters. +- Create a folder [`community/methods/`](../community/methods) and include a README file in it, explaining the method details, hyper-parameters, strategy/logic for selecting the best model for unlearning etc. +- Include a bash script `run.sh` with the exact bash command needed to replicate your results. + +### 4. Update Leaderboard and Upload Model + +Don't forget to add your results to the [leaderboard](results.md) and upload your unlearned model to HuggingFace for broader accessibility and reproducibility. + +```bash +pip install huggingface_hub +huggingface-cli login + +huggingface-cli repo create {benchmark}-{model}-{datasplit}-{method} +cd + +git init +git remote add origin https://huggingface.co//{benchmark}-{model}-{datasplit}-{method} +git add . +git commit -m "Initial commit" +git push origin main +``` + +--- + +## Contributing to Unlearning Benchmark Evaluations + +Evaluating LLM unlearning is essential for assessing the effectiveness of different unlearning methods. While various benchmarks and metrics exist, identifying the most suitable ones for capturing the nuances of unlearning remains an open challenge. + +Your contributions toward defining or improving evaluation methods can significantly advance unlearning research. By proposing reliable benchmarks, you help ensure that unlearning methods are both effective and aligned with real-world requirements. + +- To add a new unlearning evaluation metric, refer to our [Metric Implementation Guide]((components.md#evaluation-metric).). +- To integrate new datasets and models, follow our [Components Guide](components.md). + +### Steps to add a new Unlearning Benchmark + +1. **Prepare Datasets & Models** – Create your dataset and train models to generate fine-tuned or retained models. +2. **Define a New Benchmark** (if needed) – Follow the [Benchmark Guide]((components.md#benchmark)) to implement a new evaluation benchmark. +3. **Run and Tune Baseline Methods** – Evaluate existing unlearning methods on your benchmark and optimize them. +4. **Document & Share Findings** – Provide detailed steps for reproduction in [`community/benchmarks/`](../community/benchmarks). + +--- + +## Do You Want to Add Documentation? + +We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution! + +--- + +## Create a Pull Request + +Before writing any code, we strongly advise you to search through the existing PRs or issues to make sure nobody is already working on the same thing. If you are unsure, it is always a good idea to open an issue to get some feedback. + +Follow the steps below to start contributing: + +1. Fork the [repository](https://github.com/huggingface/transformers) by clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code under your GitHub user account. + +2. Clone your fork to your local disk, and add the base repository as a remote: + + ```bash + git clone git@github.com:/open-unlearning.git + cd open-unlearning + git remote add upstream https://github.com/locuslab/open-unlearning.git + ``` + +3. You can work on the forked main branch or create a new branch to hold your development changes: + + ```bash + git checkout -b a-descriptive-name-for-my-changes + ``` + +4. Set up the environment in dev mode after following steps in [Quick Start](../README.md#-quickstart). This installs other packages such as `ruff`, `precommit` etc. + + ```bash + pip install .[dev] + ``` + +5. Develop the features in your fork/branch. + + As you work on your code, you should make sure the code is linted and formatted correctly. + + OpenUnlearning relies on `ruff` to lint & format its source code consistently. After you make changes, to check the quality of code, run + + ```bash + make quality + ``` + + If you prefer to apply the style corrections: + + ```bash + make style + ``` + + Once you're happy with your changes, add the changed files with `git add` and record your changes locally with `git commit`: + + ```bash + git add modified_file.py + git commit + ``` + + Please remember to write [good commit messages](https://chris.beams.io/posts/git-commit/) to clearly communicate the changes you made! + + To keep your copy of the code up to date with the original repository, rebase your branch on `upstream/branch` *before* you open a pull request or if requested by a maintainer: + + ```bash + git fetch upstream + git rebase upstream/main + ``` + + Push your changes to your branch: + + ```bash + git push -u origin a-descriptive-name-for-my-changes + ``` + + If you've already opened a pull request, you'll need to force push with the `--force` flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally. + +6. Now you can go to your fork of the repository on GitHub and click on **Pull Request** to open a pull request. Make sure you tick off all the boxes on our [checklist](#pull-request-checklist) below. When you're ready, you can send your changes to the project maintainers for review. + +7. Please bear with us maintainers with the changes we require! We want to ensure we keep the repository clean and easily extensible. As you make your updates: you may want to work in your local branch and push the changes to your fork, since everyone can see the changes in the pull request. Changes pushed to the fork will automatically appear in the pull request. + +### Pull Request Checklist + +☐ The pull request title should summarize your contribution. +☐ If your pull request addresses an issue, please mention the issue number in the pull request description to make sure they are linked (and people viewing the issue know you are working on it). +☐ To indicate a work in progress please prefix the title with `[WIP]`. These are useful to avoid duplicated work, and to differentiate it from PRs ready to be merged. +☐ Make sure existing tests and checks, if any, pass. +☐ Make methods having informative docstrings. \ No newline at end of file diff --git a/docs/experiments.md b/docs/experiments.md index 8610961..b570d3e 100644 --- a/docs/experiments.md +++ b/docs/experiments.md @@ -59,7 +59,7 @@ paths.output_dir=saves/unlearn/NPO/evals ``` -**Note:** The unlearning experiments support evaluation during the unlearning training. But this is supported only on a single GPU, evaluation can be performed during unlearning itself. When multiple GPUs are used to train, checkpoints must be stored and evaluated after training. +> [!Note]: The unlearning experiments support evaluation during the unlearning finetuning. But this is supported only on a single GPU When multiple GPUs are used to train, checkpoints must be stored and evaluated after training. --- @@ -242,7 +242,7 @@ CUDA_VISIBLE_DEVICES=0,1 accelerate launch \ src/train.py --config-name=unlearn.yaml experiment=unlearn/muse/default.yaml task_name=DISTRIBUTED_TRAIN ``` -**Note:** Evaluation runs are designed to work only a single GPU (this includes running evaluation during training). To run an evaluation job, modify your command to make only one GPU visible (assuming one GPU is enough for inference): +> [!Note]: Evaluation runs are designed to work only a single GPU (this includes running evaluation during training). To run an evaluation job, modify your command to make only one GPU visible (assuming one GPU is enough for inference): ```bash CUDA_VISIBLE_DEVICES=0 python src/eval.py experiment=eval/muse/default.yaml task_name=SAMPLE_EVAL diff --git a/docs/results.md b/docs/repro.md similarity index 98% rename from docs/results.md rename to docs/repro.md index 3af7cb6..ac64ac3 100644 --- a/docs/results.md +++ b/docs/repro.md @@ -4,6 +4,8 @@ +>​For results where methods have been tuned for optimal performance, please refer to the [`community/leaderboard`](../community/leaderboard.md). + The scripts below execute standard baseline unlearning experiments on the TOFU and MUSE datasets, evaluated using their corresponding benchmarks. ```bash bash scripts/tofu_unlearn.sh diff --git a/setup.py b/setup.py index 79c6dbc..209335c 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,8 @@ setup( name="open-unlearning", version="0.1.0", - author="Vineeth Dorna, Anmol Reddy Mekala", - author_email="vineethdornal@gmail.com, m.anmolreddy@gmail.com", + author="Vineeth Dorna, Anmol Mekala", + author_email="vineethdorna@gmail.com, m.anmolreddy@gmail.com", description="A library for machine unlearning in LLMs.", long_description=open("README.md").read(), long_description_content_type="text/markdown", @@ -16,7 +16,10 @@ packages=find_packages(), install_requires=requirements, # Uses requirements.txt extras_require={ - "dev": ["pre-commit==4.0.1"], # Install using `pip install .[dev]` + "dev": [ + "pre-commit==4.0.1", + "ruff==0.6.9", + ], # Install using `pip install .[dev]` }, python_requires=">=3.11", ) diff --git a/setup_data.py b/setup_data.py index 358779c..760679b 100644 --- a/setup_data.py +++ b/setup_data.py @@ -14,4 +14,4 @@ allow_patterns="*.jsonl", repo_type="dataset", local_dir="data", -) \ No newline at end of file +) diff --git a/src/trainer/unlearn/rmu.py b/src/trainer/unlearn/rmu.py index 391bd6a..d990d3a 100644 --- a/src/trainer/unlearn/rmu.py +++ b/src/trainer/unlearn/rmu.py @@ -7,11 +7,14 @@ class RMU(GradDiff): - def __init__(self, - module_regex="model\.layers\.7", - trainable_params_regex=["model\.layers\.(5|6|7)\.mlp\.down_proj\.weight"], - steering_coeff=20, - *args, **kwargs): + def __init__( + self, + module_regex="model\.layers\.7", + trainable_params_regex=["model\.layers\.(5|6|7)\.mlp\.down_proj\.weight"], + steering_coeff=20, + *args, + **kwargs, + ): """ RMU Trainer that fine-tunes only specific layers and parameters using regex-based filtering. @@ -20,40 +23,46 @@ def __init__(self, trainable_param_paths (list of str): List of regex patterns for trainable parameters. """ super().__init__(*args, **kwargs) - + # Create reference model if not already set if self.ref_model is None: self.ref_model = self._prepare_ref_model(self.model) # Unfreeze only the selected parameters - self.trainable_params_regex = trainable_params_regex # Regex for selecting params - + self.trainable_params_regex = ( + trainable_params_regex # Regex for selecting params + ) + # Get actual module references self.module_regex = module_regex # Regex for selecting modules self.model_module = self._get_matching_module(self.model, self.module_regex) self.ref_module = self._get_matching_module(self.ref_model, self.module_regex) self.steering_coeff = steering_coeff self.control_vec = None - - + def create_optimizer(self): self._freeze_all_params(self.model, False) # This makes the optimizer to select only trainable params self._set_trainable_params(self.model, self.trainable_params_regex, True) super().create_optimizer() self._freeze_all_params(self.model, True) - - + def _get_matching_module(self, model, module_regex): """Returns a single module matching the given regex from a DeepSpeed/DDP-wrapped model.""" # Handle DeepSpeed and DDP-wrapped models by accessing the underlying module if isinstance(model, deepspeed.DeepSpeedEngine): model = model.module # Extract the actual PyTorch model inside - matched_modules = {name: module for name, module in model.named_modules() if re.fullmatch(module_regex, name)} + matched_modules = { + name: module + for name, module in model.named_modules() + if re.fullmatch(module_regex, name) + } if len(matched_modules) > 1: - raise ValueError(f"More than one module matched with {module_regex}: {list(matched_modules.keys())}") + raise ValueError( + f"More than one module matched with {module_regex}: {list(matched_modules.keys())}" + ) elif not matched_modules: raise ValueError(f"No module matched with {module_regex}") @@ -70,45 +79,59 @@ def _set_trainable_params(self, model, trainable_params_regex, requires_grad=Tru if any(re.fullmatch(pattern, name) for pattern in trainable_params_regex): param.requires_grad = requires_grad # print(f"{name}:requires_grad\t{requires_grad}") - + def forward_with_cache(self, model, inputs, module, no_grad=True): """Performs a forward pass while caching the output of a specified module.""" cache = [] + def hook(module, input, output): if isinstance(output, tuple): cache.append(output[0]) else: cache.append(output) - return None - + return None + hook_handle = module.register_forward_hook(hook) - with torch.set_grad_enabled(not(no_grad)): + with torch.set_grad_enabled(not (no_grad)): outputs = model(**inputs) hook_handle.remove() return cache[0], outputs - + def get_control_vector(self, dim): if self.control_vec is None: - random_vector = torch.rand(1,1, dim) - self.control_vec = random_vector / torch.norm(random_vector) * self.steering_coeff + random_vector = torch.rand(1, 1, dim) + self.control_vec = ( + random_vector / torch.norm(random_vector) * self.steering_coeff + ) return self.control_vec - def compute_activation_loss(self, activation1, activation2, mask): - squared_diff = torch.nn.functional.mse_loss(activation1, activation2, reduction="none") # Shape (b, s, d) + squared_diff = torch.nn.functional.mse_loss( + activation1, activation2, reduction="none" + ) # Shape (b, s, d) expanded_mask = mask.unsqueeze(-1).expand_as(squared_diff) # Shape: [b, s, d] - squared_diff_sum = (squared_diff * expanded_mask).mean(dim=2).sum(dim=(1)) # Shape: [b, 1] + squared_diff_sum = ( + (squared_diff * expanded_mask).mean(dim=2).sum(dim=(1)) + ) # Shape: [b, 1] num_tokens = mask.sum(dim=-1, keepdim=True) # Sum over seq_len, Shape: [b, 1] return (squared_diff_sum / num_tokens).mean() - + def compute_retain_loss(self, model, retain_inputs): retain_loss = 0.0 - + if self.retain_loss_type == "EMBED_DIFF": - model_retain_activations, _ = self.forward_with_cache(model, retain_inputs, module=self.model_module, no_grad=False) - ref_retain_activations, _ = self.forward_with_cache(self.ref_model, retain_inputs, module=self.ref_module, no_grad=True) - mask = (retain_inputs['labels'] != -100) # Shape: [b, s] - retain_loss = self.compute_activation_loss(model_retain_activations, ref_retain_activations.to(model_retain_activations.device), mask) + model_retain_activations, _ = self.forward_with_cache( + model, retain_inputs, module=self.model_module, no_grad=False + ) + ref_retain_activations, _ = self.forward_with_cache( + self.ref_model, retain_inputs, module=self.ref_module, no_grad=True + ) + mask = retain_inputs["labels"] != -100 # Shape: [b, s] + retain_loss = self.compute_activation_loss( + model_retain_activations, + ref_retain_activations.to(model_retain_activations.device), + mask, + ) else: retain_loss = super().compute_retain_loss(model, retain_inputs) return retain_loss @@ -121,14 +144,22 @@ def compute_loss(self, model, inputs, return_outputs=False): "labels": forget_inputs["labels"], } - model_forget_activations, forget_outputs = self.forward_with_cache(model, forget_inputs, self.model_module, no_grad=False) + model_forget_activations, forget_outputs = self.forward_with_cache( + model, forget_inputs, self.model_module, no_grad=False + ) # If multiple datasets or concepts need unlearning, pass the control vector during processing; otherwise, default to a random vector during training. - control_vec = forget_inputs.get("control_vec", self.get_control_vector(model_forget_activations.shape[-1])) - control_vec = control_vec.to(dtype=model_forget_activations.dtype, device=model_forget_activations.device) + control_vec = forget_inputs.get( + "control_vec", self.get_control_vector(model_forget_activations.shape[-1]) + ) + control_vec = control_vec.to( + dtype=model_forget_activations.dtype, device=model_forget_activations.device + ) control_vec = control_vec.expand_as(model_forget_activations) - mask = (forget_inputs['labels'] != -100) # Shape: [b, s] - forget_loss = self.compute_activation_loss(model_forget_activations, control_vec, mask) - + mask = forget_inputs["labels"] != -100 # Shape: [b, s] + forget_loss = self.compute_activation_loss( + model_forget_activations, control_vec, mask + ) + retain_inputs = inputs["retain"] retain_inputs = { "input_ids": retain_inputs["input_ids"], @@ -136,7 +167,7 @@ def compute_loss(self, model, inputs, return_outputs=False): "labels": retain_inputs["labels"], } retain_loss = self.compute_retain_loss(model=model, retain_inputs=retain_inputs) - + loss = self.gamma * forget_loss + self.alpha * retain_loss return (loss, forget_outputs) if return_outputs else loss From 26aa294a1e262f7803d48f0eef49935b9a9cf83b Mon Sep 17 00:00:00 2001 From: Anmol Mekala <49127549+molereddy@users.noreply.github.com> Date: Mon, 14 Apr 2025 13:26:56 -0400 Subject: [PATCH 9/9] Fix documentation and some miscellaneous things (#13) * Re-formatting + more badges * Update and fix docs * Make error msg accurate * handle lack of flash-attn flag better * Document more hydra features * update example exp configs to match latest supported metrics * Change HF logo * Simplify eval exp cfg dump * testing push workflows * Add workflow test branch * update workflow path again * Reformat badges to fix blue line issue * Fix div * revert change to tests build path --- .github/workflows/tests.yml | 10 +- README.md | 42 +++-- community/benchmarks/template/README.md | 2 +- community/methods/template/README.md | 2 +- configs/experiment/examples/muse_unlearn.yaml | 150 ++++++++++++++---- configs/experiment/examples/tofu_eval.yaml | 118 +++++++++----- docs/experiments.md | 36 +---- docs/hydra.md | 27 +++- src/model/__init__.py | 2 +- src/trainer/base.py | 2 +- 10 files changed, 251 insertions(+), 140 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f1b0afa..5003e56 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,11 +1,11 @@ name: tests on: - # push: - # paths: - # - "**.py" - # - "requirements.txt" - # - ".github/workflows/*.yml" + push: + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" pull_request: paths: - "**.py" diff --git a/README.md b/README.md index 3138535..4868757 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,12 @@

An easily extensible framework unifying LLM unlearning evaluation benchmarks.

- - Build Status - - - Hugging Face - - - GitHub Repo stars - + GitHub Repo stars + Build Status + HuggingFace πŸ€— + GitHub repo size + GitHub top language + License: MIT
@@ -30,7 +27,7 @@ We invite the LLM unlearning community to collaborate by adding new benchmarks, ### πŸ“’ Updates #### [Apr 6, 2025] -⚠️⚠️ **IMPORTANT:** Be sure to run `python setup_data.py` immediately after merging the latest version. This is required to refresh the downloaded eval log files and ensure they're compatible with the latest evaluation metrics. +🚨🚨 **IMPORTANT:** 🚨🚨 Be sure to run `python setup_data.py` immediately after merging the latest version. This is required to refresh the downloaded eval log files and ensure they're compatible with the latest evaluation metrics. - **More Metrics!** Added 6 Membership Inference Attacks (MIA) (LOSS, ZLib, Reference, GradNorm, MinK, and MinK++), along with Extraction Strength (ES) and Exact Memorization (EM) as additional evaluation metrics. - **More TOFU Evaluations!** Now includes a holdout set and supports MIA attack-based evaluation. You can now compute MUSE's privleak on TOFU. - **More Documentation!** [`docs/links.md`](docs/links.md) contains resources for each of the implemented features and other useful LLM unlearning resources. @@ -89,13 +86,13 @@ We provide several variants for each of the components in the unlearning pipelin ## ⚑ Quickstart ```bash -# environment setup +# Environment setup conda create -n unlearning python=3.11 conda activate unlearning pip install . pip install --no-build-isolation flash-attn==2.6.3 -# data setup +# Data setup python setup_data.py # saves/eval now contains evaluation results of the uploaded models # Downloads log files with metric eval results (incl retain model logs) from the models # used in the supported benchmarks. @@ -175,7 +172,7 @@ For more in-depth information on specific aspects of the framework, refer to the | [`docs/contributing.md`](docs/contributing.md) | Instructions on how to add new methods, benchmarks, components such as trainers, benchmarks, metrics, models, datasets, etc. | | [`docs/evaluation.md`](docs/evaluation.md) | Detailed instructions on creating and running evaluation metrics and benchmarks. | | [`docs/experiments.md`](docs/experiments.md) | Guide on running experiments in various configurations and settings, including distributed training, fine-tuning, and overriding arguments. | -| [`docs/hydra.md`](docs/hydra.md) | Explanation of the Hydra features used in configuration management for experiments. | +| [`docs/hydra.md`](docs/hydra.md) | A short tutorial on Hydra features, Hydra is the configuration management package we use extensively. | | [`community/leaderboard.md`](community/leaderboard.md) | Reference results from various unlearning methods run using this framework on TOFU and MUSE benchmarks. | | [`docs/links.md`](docs/links.md) | List of all links to the research papers or other sources the implemented features are sourced from. | | [`docs/repro.md`](docs/repro.md) | Results are provided solely for reproducibility purposes, without any parameter tuning. | @@ -193,26 +190,25 @@ If you use OpenUnlearning in your research, please cite OpenUnlearning and the b ```bibtex @misc{openunlearning2025, - title={OpenUnlearning: A Unified Framework for LLM Unlearning Benchmarks}, + title={{OpenUnlearning}: A Unified Framework for LLM Unlearning Benchmarks}, author={Dorna, Vineeth and Mekala, Anmol and Zhao, Wenlong and McCallum, Andrew and Kolter, J Zico and Maini, Pratyush}, year={2025}, howpublished={\url{https://github.com/locuslab/open-unlearning}}, note={Accessed: February 27, 2025} } @inproceedings{maini2024tofu, - title={TOFU: A Task of Fictitious Unlearning for LLMs}, + title={{TOFU}: A Task of Fictitious Unlearning for LLMs}, author={Maini, Pratyush and Feng, Zhili and Schwarzschild, Avi and Lipton, Zachary Chase and Kolter, J Zico}, booktitle={First Conference on Language Modeling}, year={2024} } -@article{shi2024muse, - title={MUSE: Machine Unlearning Six-Way Evaluation for Language Models}, +@inproceedings{ + shi2025muse, + title={{MUSE}: Machine Unlearning Six-Way Evaluation for Language Models}, author={Weijia Shi and Jaechan Lee and Yangsibo Huang and Sadhika Malladi and Jieyu Zhao and Ari Holtzman and Daogao Liu and Luke Zettlemoyer and Noah A. Smith and Chiyuan Zhang}, - year={2024}, - eprint={2407.06460}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2407.06460}, + booktitle={The Thirteenth International Conference on Learning Representations}, + year={2025}, + url={https://openreview.net/forum?id=TArmA033BU} } ``` @@ -231,6 +227,4 @@ This project is licensed under the MIT License. See the [`LICENSE`](LICENSE) fil --- -### Star History - [![Star History Chart](https://api.star-history.com/svg?repos=locuslab/open-unlearning&type=Date)](https://www.star-history.com/#locuslab/open-unlearning&Date) diff --git a/community/benchmarks/template/README.md b/community/benchmarks/template/README.md index 855952f..15ec35b 100644 --- a/community/benchmarks/template/README.md +++ b/community/benchmarks/template/README.md @@ -26,7 +26,7 @@ Please include the experimental setup for the baselines - [ ] **Hyperparameters & Search Space:** Specify key hyperparameters, their search ranges, number of trials etc. - [ ] **Computational Setup:** Mention the type and number of GPUs used. -- [ ] **DeepSpeed Configuration:** If any modifications were made to the default DeepSpeed config, specify them here. (You may include the config as a code block.) +- [ ] **DeepSpeed Configuration** (if used): If any modifications were made to the default DeepSpeed config, specify them here. (You may include the config as a code block.) - [ ] **Other Details:** Any additional setup details crucial for reproducing your method. To replicate your results, provide a `run.sh` script that contains all necessary commands to reproduce the final results. Ensure the script is well-documented. diff --git a/community/methods/template/README.md b/community/methods/template/README.md index 7facb01..6c77875 100644 --- a/community/methods/template/README.md +++ b/community/methods/template/README.md @@ -11,7 +11,7 @@ Please include the experimental setup such as - [ ] **Hyperparameters & Search Space:** Specify key hyperparameters, their search ranges, number of trials etc. - [ ] **Computational Setup:** Mention the type and number of GPUs used. -- [ ] **DeepSpeed Configuration:** If any modifications were made to the default DeepSpeed config, specify them here. (You may include the config as a code block.) +- [ ] **DeepSpeed Configuration** (if used): If any modifications were made to the default DeepSpeed config, specify them here. (You may include the config as a code block.) - [ ] **Other Details:** Any additional setup details crucial for reproducing your method. # Results diff --git a/configs/experiment/examples/muse_unlearn.yaml b/configs/experiment/examples/muse_unlearn.yaml index 07c6d12..0e6b2b6 100644 --- a/configs/experiment/examples/muse_unlearn.yaml +++ b/configs/experiment/examples/muse_unlearn.yaml @@ -22,13 +22,15 @@ trainer: per_device_train_batch_size: 4 per_device_eval_batch_size: 16 gradient_accumulation_steps: 8 - learning_rate: 1.0e-05 + learning_rate: 3.0e-05 bf16: true bf16_full_eval: true logging_steps: 5 output_dir: ${paths.output_dir} logging_dir: ${trainer.args.output_dir}/logs report_to: tensorboard + ddp_find_unused_parameters: None + gradient_checkpointing: false optim: paged_adamw_32bit save_strategy: 'no' save_only_model: true @@ -53,22 +55,20 @@ data: args: hf_args: path: muse-bench/MUSE-News - name: train + name: raw split: ${forget_split} text_key: text - max_length: 128 - insert_space: true + max_length: 2048 retain: MUSE_retain: handler: PretrainingDataset args: hf_args: path: muse-bench/MUSE-News - name: train + name: raw split: ${retain_split} text_key: text - max_length: 128 - insert_space: true + max_length: 2048 anchor: forget collator: DataCollatorForSupervisedDataset: @@ -119,64 +119,144 @@ eval: handler: rouge rouge_type: rougeL_f1 batch_size: 16 + retain_knowmem_ROUGE: + datasets: + MUSE_retain_knowmem: + handler: QADataset + args: + hf_args: + path: muse-bench/MUSE-${eval.muse.data_split} + name: knowmem + split: retain_qa + few_shot_dataset_hf_args: + path: muse-bench/MUSE-${eval.muse.data_split} + name: knowmem + split: retain_qa_icl + question_key: question + answer_key: answer + max_length: 512 + predict_with_generate: true + collators: + DataCollatorForSupervisedDataset: + handler: DataCollatorForSupervisedDataset + args: + padding_side: left + index: index + generation_args: + do_sample: false + top_p: null + temperature: null + max_new_tokens: 32 + use_cache: true + stopwords: + - ' + + + ' + - ' + + Question' + - 'Question:' + handler: rouge + rouge_type: rougeL_f1 + batch_size: 16 + forget_verbmem_ROUGE: + datasets: + MUSE_forget_verbmem: + handler: CompletionDataset + args: + hf_args: + path: muse-bench/MUSE-${eval.muse.data_split} + name: verbmem + split: forget + prefix_key: prompt + text_key: gt + max_length: 2048 + insert_space: true + predict_with_generate: true + collators: + DataCollatorForSupervisedDataset: + handler: DataCollatorForSupervisedDataset + args: + padding_side: left + index: index + generation_args: + do_sample: false + top_p: null + temperature: null + max_new_tokens: 128 + use_cache: true + handler: rouge + rouge_type: rougeL_f1 + batch_size: 8 privleak: pre_compute: - forget_minKpc_neg_logprob: + mia_min_k: datasets: - MUSE_forget_privleak: - handler: PretrainingDataset + MUSE_MIA_holdout: + access_key: holdout + handler: CompletionDataset args: hf_args: path: muse-bench/MUSE-${eval.muse.data_split} name: privleak - split: forget + split: holdout prefix_key: prompt text_key: text - collators: - DataCollatorForSupervisedDataset: - handler: DataCollatorForSupervisedDataset - args: - padding_side: right - index: index - handler: minKpc_negative_logprob - batch_size: 8 - k: 0.4 - access_key: forget - holdout_minKpc_neg_logprob: - datasets: - MUSE_holdout_privleak: - handler: PretrainingDataset + max_length: 2048 + MUSE_MIA_forget: + access_key: forget + handler: CompletionDataset args: hf_args: path: muse-bench/MUSE-${eval.muse.data_split} name: privleak - split: holdout + split: forget prefix_key: prompt text_key: text + max_length: 2048 collators: DataCollatorForSupervisedDataset: handler: DataCollatorForSupervisedDataset args: padding_side: right index: index - handler: minKpc_negative_logprob batch_size: 8 + handler: mia_min_k k: 0.4 - access_key: holdout + access_key: forget reference_logs: retain_model_logs: path: ${eval.muse.retain_logs_path} include: - forget_minKpc_neg_logprob: + mia_min_k: access_key: retain - holdout_minKpc_neg_logprob: - access_key: holdout handler: privleak ref_value: 0.5 + extraction_strength: + datasets: + MUSE_forget_verbmem: + handler: CompletionDataset + args: + hf_args: + path: muse-bench/MUSE-${eval.muse.data_split} + name: verbmem + split: forget + prefix_key: prompt + text_key: gt + max_length: 2048 + insert_space: true + collators: + DataCollatorForSupervisedDataset: + handler: DataCollatorForSupervisedDataset + args: + padding_side: right + index: index + handler: extraction_strength + batch_size: 8 handler: MUSEEvaluator - device: cuda output_dir: ${paths.output_dir} - overwrite: false + overwrite: true data_split: ${data_split} retain_logs_path: ${retain_logs_path} paths: @@ -188,6 +268,6 @@ paths: data_split: News forget_split: forget retain_split: retain1 -retain_logs_path: saves/eval/muse_news_retain/MUSE_EVAL.json -task_name: llama2_news_NPO +retain_logs_path: saves/eval/muse_Llama-2-7b-hf_News_retrain/MUSE_EVAL.json +task_name: muse_npo_unlearn mode: unlearn diff --git a/configs/experiment/examples/tofu_eval.yaml b/configs/experiment/examples/tofu_eval.yaml index c43c8c4..0100d79 100644 --- a/configs/experiment/examples/tofu_eval.yaml +++ b/configs/experiment/examples/tofu_eval.yaml @@ -1,22 +1,90 @@ model: model_args: - device_map: auto - pretrained_model_name_or_path: locuslab/tofu_ft_llama2-7b + device_map: cuda + pretrained_model_name_or_path: open-unlearning/tofu_Llama-3.2-1B-Instruct_full attn_implementation: flash_attention_2 torch_dtype: bfloat16 tokenizer_args: - pretrained_model_name_or_path: locuslab/tofu_ft_llama2-7b + pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct template_args: - apply_chat_template: false - user_start_tag: '[INST] ' - user_end_tag: ' [/INST]' - asst_start_tag: '' - asst_end_tag: '' + apply_chat_template: true + system_prompt: You are a helpful assistant. + system_prompt_with_special_tokens: '<|begin_of_text|><|start_header_id|>system<|end_header_id|> + + + You are a helpful assistant.<|eot_id|>' + user_start_tag: '<|start_header_id|>user<|end_header_id|> + + + ' + user_end_tag: <|eot_id|> + asst_start_tag: '<|start_header_id|>assistant<|end_header_id|> + + + ' + asst_end_tag: <|eot_id|> mode: eval -task_name: eval +task_name: SAMPLE_EVAL +seed: 0 eval: tofu: metrics: + forget_quality: + pre_compute: + forget_truth_ratio: + pre_compute: + forget_Q_A_PARA_Prob: + datasets: + TOFU_QA_forget_para: + handler: QADataset + args: + hf_args: + name: ${eval.tofu.forget_split}_perturbed + split: train + path: locuslab/TOFU + question_key: question + answer_key: paraphrased_answer + max_length: 512 + collators: + DataCollatorForSupervisedDataset: + handler: DataCollatorForSupervisedDataset + args: + padding_side: right + index: index + handler: probability + batch_size: 32 + access_key: correct + forget_Q_A_PERT_Prob: + datasets: + TOFU_QA_forget_pert: + handler: QADataset + args: + hf_args: + name: ${eval.tofu.forget_split}_perturbed + split: train + path: locuslab/TOFU + question_key: question + answer_key: perturbed_answer + max_length: 512 + collators: + DataCollatorForSupervisedDataset: + handler: DataCollatorForSupervisedDataset + args: + padding_side: right + index: index + handler: probability + batch_size: 32 + access_key: wrong + handler: truth_ratio + aggregator: closer_to_1_better + access_key: forget + reference_logs: + retain_model_logs: + path: ${eval.tofu.retain_logs_path} + include: + forget_truth_ratio: + access_key: retain + handler: ks_test forget_Q_A_Prob: datasets: TOFU_QA_forget: @@ -37,38 +105,11 @@ eval: index: index handler: probability batch_size: 32 - forget_Q_A_ROUGE: - datasets: - TOFU_QA_forget: - handler: QADataset - args: - hf_args: - name: ${eval.tofu.forget_split} - split: train - path: locuslab/TOFU - question_key: question - answer_key: answer - max_length: 512 - predict_with_generate: true - collators: - DataCollatorForSupervisedDataset: - handler: DataCollatorForSupervisedDataset - args: - padding_side: left - index: index - generation_args: - do_sample: false - top_p: null - temperature: null - max_new_tokens: 200 - use_cache: true - handler: rouge - rouge_type: rougeL_recall - batch_size: 32 handler: TOFUEvaluator output_dir: ${paths.output_dir} overwrite: false forget_split: ${forget_split} + holdout_split: ${holdout_split} retain_logs_path: ${retain_logs_path} paths: root_dir: . @@ -77,4 +118,5 @@ paths: output_dir: ${paths.root_dir}/saves/${mode}/${task_name} work_dir: ${hydra:runtime.cwd} forget_split: forget10 -retain_logs_path: null +holdout_split: holdout10 +retain_logs_path: saves/eval/tofu_Llama-3.2-1B-Instruct_retain90/TOFU_EVAL.json diff --git a/docs/experiments.md b/docs/experiments.md index 4aa1462..728e61b 100644 --- a/docs/experiments.md +++ b/docs/experiments.md @@ -63,7 +63,7 @@ paths.output_dir=saves/unlearn/NPO/evals > [!NOTE] -The unlearning experiments support evaluation during the unlearning finetuning. But this is supported only on a single GPU When multiple GPUs are used to train, checkpoints must be stored and evaluated after training. +The unlearning experiments support evaluation during the unlearning finetuning. But this is supported only when a single accelerator process is used, checkpoints must be stored and evaluated after training. --- @@ -74,29 +74,6 @@ To understand the structure of an evaluation config and the kind of available pa To understand the structure of an unlearning config and the kind of available parameters for overriding, refer to: [`configs/experiment/examples/muse_unlearn.yaml`](../configs/experiment/examples/muse_unlearn.yaml). The following tables list the most commonly used arguments while running experiments. - ###

Model Settings

@@ -234,11 +211,9 @@ python src/train.py --config-name=train.yaml experiment=finetune/tofu/default \ trainer.args.learning_rate=5e-5 task_name=llama3.2-1B_finetune_example ``` - - ## Distributed Training -Distributed training configurations enable scaling experiments across multiple devices or nodes. In most cases, default distributed settings from [`configs/accelerate/default_config.yaml`](../configs/accelerate/default_config.yaml) are sufficient. You can run distributed training with a default command such as: +Distributed training configurations enable scaling experiments across multiple devices or nodes. In most cases, default distributed settings from [`configs/accelerate/default_config.yaml`](../configs/accelerate/default_config.yaml) are sufficient. You can run distributed training with the below command that uses DeepSpeed for distributed training (which is our default setup): ```bash CUDA_VISIBLE_DEVICES=0,1 accelerate launch \ @@ -246,9 +221,12 @@ CUDA_VISIBLE_DEVICES=0,1 accelerate launch \ src/train.py --config-name=unlearn.yaml experiment=unlearn/muse/default.yaml task_name=DISTRIBUTED_TRAIN ``` +You may also simply run `CUDA_VISIBLE_DEVICES=0,1,.. python ...` to leverage Accelerate's DDP or model parallel. For model parallel you can use `device_map="auto"` in the `model_args` while loading the model. + > [!CAUTION] -> Evaluation runs are designed to work only a single GPU (this includes running evaluation during training). To run an evaluation job, modify your command to make only one GPU visible (assuming one GPU is enough for inference), as shown below +> Train runs using multiple accelerate processes will not be able to run evaluations during training. To achieve this, you may want to use DDP/model parallel (see #94) or use a single GPU to run the evaluation code directly on a saved model checkpoint like below ```bash -CUDA_VISIBLE_DEVICES=0 python src/eval.py experiment=eval/muse/default.yaml task_name=SAMPLE_EVAL +CUDA_VISIBLE_DEVICES=0 python src/eval.py experiment=eval/muse/default.yaml task_name=SAMPLE_EVAL \ +model.model_args.pretrained_model_name_or_path=saves/unlearn/muse_unlearn_exp \ ``` diff --git a/docs/hydra.md b/docs/hydra.md index 1ce15e1..6e2b080 100644 --- a/docs/hydra.md +++ b/docs/hydra.md @@ -10,8 +10,7 @@ We use this config file for illustration, from [`configs/experiment/unlearn/muse defaults: - override /model: Llama-2-7b-hf # loads from model/Llama-2-7b-hf.yaml into the model attribute - override /trainer: GradAscent # loads from trainer/GradAscent.yaml into the trainer attribute -- override /data: unlearn # loads from data/unlearn.yaml into the data attribute -# , setting up data structure for loading data during unlearning +- override /data: unlearn # loads from data/unlearn.yaml into the "data" attribute,, setting up data structures for loading datasets during unlearning - override /eval: muse # loads MUSE evaluation suite from eval/muse.yaml into the eval attribute # define variables @@ -57,6 +56,7 @@ trainer: task_name: ??? # ??? raises and error if this attribute is not set ``` + - **Structure & Attribute Access:** Configs are written in YAML and structured hierarchically like a dictionary. Attributes are accessed using dot notation: In code `cfg.model.args.learning_rate`, in command-line: `model.args.learning_rate=1e-5`. - **Defaults & Overrides:** Configs are files are included in one another using `defaults` and `override` commands. @@ -72,9 +72,26 @@ task_name=unlearn_muse_simnpo For example, refer [`configs/eval/muse_metrics/forget_knowmem_ROUGE.yaml`](../configs/eval/muse_metrics/forget_knowmem_ROUGE.yaml) -- **Variable Substitution:** Variables are defined once and reused using the `${}` syntax: +- **Variable Substitution:** Variables are defined once and reused using the `${}` syntax. +- **Adding New Attributes with `+`:** Use the `+` prefix to add attributes that are not already in the config. For example, to add a new argument to the trainer: +```bash +python src/train.py experiment=unlearn/muse/default +trainer.args.my_new_arg=10 +``` -To understand the structure of an evaluation config and the available parameters for overriding, refer to: [`configs/experiment/examples/tofu_eval.yaml`](../configs/experiment/examples/tofu_eval.yaml). +- **Attribute Removal with `~`:** You can remove an attribute from the config at runtime using the tilde `~`. For example, to remove flash attention setting: +```bash +python src/train.py experiment=unlearn/muse/default ~model.model_args.attn_implementation +``` +> [!NOTE] +> In `zsh`, you must **quote** or **escape** the `~` to avoid it being misinterpreted as a home directory: e.g.: +```bash +python src/train.py \~model.model_args.attn_implementation +python src/train.py "~model.model_args.attn_implementation" +``` +> [!NOTE] +> Hydra uses PyYAML to handle yaml files and transform inputs while giving config inputs. This handles cases like converting `true` to `True` -To understand the structure of an unlearning config and the available parameters for overriding, refer to: [`configs/experiment/examples/muse_unlearn.yaml`](../configs/experiment/examples/muse_unlearn.yaml). \ No newline at end of file +Refer to the following for config structures and overridable parameters: +- Evaluation: [`configs/experiment/examples/tofu_eval.yaml`](../configs/experiment/examples/tofu_eval.yaml) +- Unlearning: [`configs/experiment/examples/muse_unlearn.yaml`](../configs/experiment/examples/muse_unlearn.yaml) \ No newline at end of file diff --git a/src/model/__init__.py b/src/model/__init__.py index 4143cfc..0ccc9f4 100644 --- a/src/model/__init__.py +++ b/src/model/__init__.py @@ -13,7 +13,7 @@ def get_dtype(model_args): with open_dict(model_args): torch_dtype = model_args.pop("torch_dtype", None) - if model_args["attn_implementation"] == "flash_attention_2": + if model_args.get("attn_implementation", None) == "flash_attention_2": # This check handles https://github.com/Dao-AILab/flash-attention/blob/7153673c1a3c7753c38e4c10ef2c98a02be5f778/flash_attn/flash_attn_triton.py#L820 # If you want to run at other precisions consider running "training or inference using # Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` diff --git a/src/trainer/base.py b/src/trainer/base.py index ddda956..c9cfdce 100644 --- a/src/trainer/base.py +++ b/src/trainer/base.py @@ -47,7 +47,7 @@ def evaluate( self.log(eval_metrics) else: logger.warning( - "Custom evaluator can be run with this Trainer only on a single GPU" + "Custom evaluator can be run with this Trainer only when a single accelerator process is running." ) return eval_metrics