From 54d3560daf1dc50a390c722e5175aab62333d1ca Mon Sep 17 00:00:00 2001 From: Anmol Mekala <49127549+molereddy@users.noreply.github.com> Date: Sat, 1 Mar 2025 09:13:50 -0500 Subject: [PATCH 1/6] Fix hyperlinks in README (#2) * testing commit * Fixes * cleanup --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index dce38e5..a04d486 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ We provide several variants for each of the components in the unlearning pipelin ## 📌 Table of Contents - 📖 [Overview](#-overview) -- 🗃️ [Available Components](#-available-components) +- 🗃️ [Available Components](#%EF%B8%8F-available-components) - ⚡ [Quickstart](#-quickstart) - 🛠️ [Environment Setup](#-environment-setup) - 💾 [Data Setup](#-data-setup) @@ -56,7 +56,7 @@ We provide several variants for each of the components in the unlearning pipelin - ➕ [How to Add New Components](#-how-to-add-new-components) - 📚 [Further Documentation](#-further-documentation) - 🔗 [Support & Contributors](#-support--contributors) -- 📝 [Citing this work](#-citating-this-work) +- 📝 [Citing this work](#-citing-this-work) - 🤝 [Acknowledgements](#-acknowledgements) - 📄 [License](#-license) @@ -198,7 +198,7 @@ If you use OpenUnlearning in your research, please cite: --- -### 🤝 Acknowledgments +### 🤝 Acknowledgements - This repo is inspired from [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory). - The [TOFU](https://github.com/locuslab/tofu) and [MUSE](https://github.com/jaechan-repo/muse_bench) benchmarks served as the foundation for our re-implementation. From 4c36e4f5a39d979280efd20452be7ba5ff54e40a Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 18:37:19 +0000 Subject: [PATCH 2/6] Fixed DPO command --- scripts/tofu_unlearn.sh | 56 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/scripts/tofu_unlearn.sh b/scripts/tofu_unlearn.sh index a556bd1..eba38ff 100644 --- a/scripts/tofu_unlearn.sh +++ b/scripts/tofu_unlearn.sh @@ -9,12 +9,6 @@ models=( "Llama-3.2-3B-Instruct" "Llama-3.1-8B-Instruct" ) -trainers_experiments=( - "GradAscent unlearn/tofu/default.yaml" - "GradDiff unlearn/tofu/default.yaml" - "NPO unlearn/tofu/default.yaml" - "DPO unlearn/tofu/default.yaml" -) forget_retain_splits=( "forget01 retain99" "forget05 retain95" @@ -29,7 +23,57 @@ gradient_accumulation_steps=4 ########################################### Unlearn TOFU models ######################################################## ######################################################################################################################## +trainers_experiments=( + "GradAscent unlearn/tofu/default.yaml" + "GradDiff unlearn/tofu/default.yaml" + "NPO unlearn/tofu/default.yaml" +) +for split in "${forget_retain_splits[@]}"; do + forget_split=$(echo $split | cut -d' ' -f1) + retain_split=$(echo $split | cut -d' ' -f2) + for model in "${models[@]}"; do + for trainer_experiment in "${trainers_experiments[@]}"; do + trainer=$(echo $trainer_experiment | cut -d' ' -f1) + experiment=$(echo $trainer_experiment | cut -d' ' -f2) + + task_name=tofu_${model}_${forget_split}_${trainer} + model_path=open-unlearning/tofu_${model}_full + echo ${task_name}: Unlearning ${model_path} using ${trainer} + + # Unlearn + CUDA_VISIBLE_DEVICES=0,1 accelerate launch --config_file configs/accelerate/default_config.yaml --main_process_port $MASTER_PORT \ + src/train.py --config-name=unlearn.yaml \ + experiment=${experiment} \ + trainer=${trainer} \ + task_name=${task_name} \ + model=${model} \ + forget_split=${forget_split} \ + retain_split=${retain_split} \ + model.model_args.pretrained_model_name_or_path=${model_path} \ + retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json \ + trainer.args.per_device_train_batch_size=$per_device_train_batch_size \ + trainer.args.gradient_accumulation_steps=$gradient_accumulation_steps \ + trainer.args.ddp_find_unused_parameters=true \ + trainer.args.gradient_checkpointing=true + + # Eval + CUDA_VISIBLE_DEVICES=0 python src/eval.py \ + experiment=eval/tofu/default.yaml \ + forget_split=${forget_split} \ + model=${model} \ + task_name=${task_name} \ + model.model_args.pretrained_model_name_or_path=saves/unlearn/${task_name} \ + paths.output_dir=saves/unlearn/${task_name}/evals \ + retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json + done + done +done + + +trainers_experiments=( + "DPO unlearn/tofu/idk.yaml" +) for split in "${forget_retain_splits[@]}"; do forget_split=$(echo $split | cut -d' ' -f1) retain_split=$(echo $split | cut -d' ' -f2) From f7a69dee40cfe918e32250ce7c1ba7564205c9b4 Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 18:50:47 +0000 Subject: [PATCH 3/6] download idk --- setup_data.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup_data.py b/setup_data.py index 48de0ad..358779c 100644 --- a/setup_data.py +++ b/setup_data.py @@ -1,8 +1,17 @@ from huggingface_hub import snapshot_download +# Setup retain model metrics snapshot_download( repo_id="open-unlearning/eval", allow_patterns="*.json", repo_type="dataset", local_dir="saves/eval", ) + +# Setup data +snapshot_download( + repo_id="open-unlearning/idk", + allow_patterns="*.jsonl", + repo_type="dataset", + local_dir="data", +) \ No newline at end of file From 332af36c4772eb1e836b767adbd102951693b60c Mon Sep 17 00:00:00 2001 From: Vineeth <48151992+Dornavineeth@users.noreply.github.com> Date: Sun, 2 Mar 2025 14:13:43 -0500 Subject: [PATCH 4/6] Revert "Dpo fix" --- scripts/tofu_unlearn.sh | 56 +++++------------------------------------ setup_data.py | 9 ------- 2 files changed, 6 insertions(+), 59 deletions(-) diff --git a/scripts/tofu_unlearn.sh b/scripts/tofu_unlearn.sh index eba38ff..a556bd1 100644 --- a/scripts/tofu_unlearn.sh +++ b/scripts/tofu_unlearn.sh @@ -9,6 +9,12 @@ models=( "Llama-3.2-3B-Instruct" "Llama-3.1-8B-Instruct" ) +trainers_experiments=( + "GradAscent unlearn/tofu/default.yaml" + "GradDiff unlearn/tofu/default.yaml" + "NPO unlearn/tofu/default.yaml" + "DPO unlearn/tofu/default.yaml" +) forget_retain_splits=( "forget01 retain99" "forget05 retain95" @@ -23,57 +29,7 @@ gradient_accumulation_steps=4 ########################################### Unlearn TOFU models ######################################################## ######################################################################################################################## -trainers_experiments=( - "GradAscent unlearn/tofu/default.yaml" - "GradDiff unlearn/tofu/default.yaml" - "NPO unlearn/tofu/default.yaml" -) -for split in "${forget_retain_splits[@]}"; do - forget_split=$(echo $split | cut -d' ' -f1) - retain_split=$(echo $split | cut -d' ' -f2) - for model in "${models[@]}"; do - for trainer_experiment in "${trainers_experiments[@]}"; do - trainer=$(echo $trainer_experiment | cut -d' ' -f1) - experiment=$(echo $trainer_experiment | cut -d' ' -f2) - - task_name=tofu_${model}_${forget_split}_${trainer} - model_path=open-unlearning/tofu_${model}_full - echo ${task_name}: Unlearning ${model_path} using ${trainer} - - # Unlearn - CUDA_VISIBLE_DEVICES=0,1 accelerate launch --config_file configs/accelerate/default_config.yaml --main_process_port $MASTER_PORT \ - src/train.py --config-name=unlearn.yaml \ - experiment=${experiment} \ - trainer=${trainer} \ - task_name=${task_name} \ - model=${model} \ - forget_split=${forget_split} \ - retain_split=${retain_split} \ - model.model_args.pretrained_model_name_or_path=${model_path} \ - retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json \ - trainer.args.per_device_train_batch_size=$per_device_train_batch_size \ - trainer.args.gradient_accumulation_steps=$gradient_accumulation_steps \ - trainer.args.ddp_find_unused_parameters=true \ - trainer.args.gradient_checkpointing=true - - # Eval - CUDA_VISIBLE_DEVICES=0 python src/eval.py \ - experiment=eval/tofu/default.yaml \ - forget_split=${forget_split} \ - model=${model} \ - task_name=${task_name} \ - model.model_args.pretrained_model_name_or_path=saves/unlearn/${task_name} \ - paths.output_dir=saves/unlearn/${task_name}/evals \ - retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json - done - done -done - - -trainers_experiments=( - "DPO unlearn/tofu/idk.yaml" -) for split in "${forget_retain_splits[@]}"; do forget_split=$(echo $split | cut -d' ' -f1) retain_split=$(echo $split | cut -d' ' -f2) diff --git a/setup_data.py b/setup_data.py index 358779c..48de0ad 100644 --- a/setup_data.py +++ b/setup_data.py @@ -1,17 +1,8 @@ from huggingface_hub import snapshot_download -# Setup retain model metrics snapshot_download( repo_id="open-unlearning/eval", allow_patterns="*.json", repo_type="dataset", local_dir="saves/eval", ) - -# Setup data -snapshot_download( - repo_id="open-unlearning/idk", - allow_patterns="*.jsonl", - repo_type="dataset", - local_dir="data", -) \ No newline at end of file From f468efb9eaa0c737e6e8b4e64abb411131ff7a99 Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 19:22:21 +0000 Subject: [PATCH 5/6] download idk data --- setup_data.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/setup_data.py b/setup_data.py index 48de0ad..358779c 100644 --- a/setup_data.py +++ b/setup_data.py @@ -1,8 +1,17 @@ from huggingface_hub import snapshot_download +# Setup retain model metrics snapshot_download( repo_id="open-unlearning/eval", allow_patterns="*.json", repo_type="dataset", local_dir="saves/eval", ) + +# Setup data +snapshot_download( + repo_id="open-unlearning/idk", + allow_patterns="*.jsonl", + repo_type="dataset", + local_dir="data", +) \ No newline at end of file From ca8d5038b07ca8b8bbf0a71bf8a9a5502899f154 Mon Sep 17 00:00:00 2001 From: Dornavineeth Date: Sun, 2 Mar 2025 19:22:45 +0000 Subject: [PATCH 6/6] fix dpo experiment config --- scripts/tofu_unlearn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tofu_unlearn.sh b/scripts/tofu_unlearn.sh index a556bd1..1794c9b 100644 --- a/scripts/tofu_unlearn.sh +++ b/scripts/tofu_unlearn.sh @@ -13,7 +13,7 @@ trainers_experiments=( "GradAscent unlearn/tofu/default.yaml" "GradDiff unlearn/tofu/default.yaml" "NPO unlearn/tofu/default.yaml" - "DPO unlearn/tofu/default.yaml" + "DPO unlearn/tofu/idk.yaml" ) forget_retain_splits=( "forget01 retain99"