diff --git a/docs/source/api_ref_models.rst b/docs/source/api_ref_models.rst
index b2d74022b1..e658de7294 100644
--- a/docs/source/api_ref_models.rst
+++ b/docs/source/api_ref_models.rst
@@ -99,7 +99,7 @@ To download the Llama3.1-405B-Instruct model:
     tune download meta-llama/Meta-Llama-3.1-405B-Instruct --ignore-patterns "original/consolidated*" --hf-token <HF_TOKEN>
 
 To download the Llama3 weights of the above models, you can instead download from `Meta-Llama-3-8B-Instruct` and
-`Meta-Llama-3-70B-Instruct`.
+`Meta-Llama-3-70B-Instruct`, and remove the ignore patterns flag.
 
 .. autosummary::
     :toctree: generated/
@@ -217,7 +217,7 @@ To download the Qwen2.5 1.5B model, for example:
 
 .. code-block:: bash
 
-    tune download Qwen/Qwen2.5-1.5B-Instruct --output-dir /tmp/Qwen2_5-1_5B-Instruct --ignore-patterns None
+    tune download Qwen/Qwen2.5-1.5B-Instruct --output-dir /tmp/Qwen2_5-1_5B-Instruct
 
 .. autosummary::
     :toctree: generated/
@@ -258,7 +258,7 @@ To download the Qwen2 1.5B model, for example:
 
 .. code-block:: bash
 
-    tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct --ignore-patterns None
+    tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct
 
 .. autosummary::
     :toctree: generated/
@@ -283,7 +283,7 @@ To download the Phi-3 Mini 4k instruct model:
 
 .. code-block:: bash
 
-    tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --ignore-patterns None --hf-token <HF_TOKEN>
+    tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
 
 .. autosummary::
     :toctree: generated/
@@ -307,7 +307,7 @@ To download the Mistral 7B v0.1 model:
 
 .. code-block:: bash
 
-    tune download mistralai/Mistral-7B-v0.1 --output-dir /tmp/Mistral-7B-v0.1 --hf-token <HF_TOKEN>
+    tune download mistralai/Mistral-7B-v0.1 --output-dir /tmp/Mistral-7B-v0.1 --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 
 .. autosummary::
     :toctree: generated/
diff --git a/recipes/configs/llama2/13B_full.yaml b/recipes/configs/llama2/13B_full.yaml
index d577559305..fd7b7421c1 100644
--- a/recipes/configs/llama2/13B_full.yaml
+++ b/recipes/configs/llama2/13B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-13b-hf --output-dir /tmp/Llama-2-13b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-13b-hf --output-dir /tmp/Llama-2-13b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed --config llama2/13B_full
diff --git a/recipes/configs/llama2/13B_lora.yaml b/recipes/configs/llama2/13B_lora.yaml
index 7a6fa600d2..2bae98471d 100644
--- a/recipes/configs/llama2/13B_lora.yaml
+++ b/recipes/configs/llama2/13B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-13b-hf --output-dir /tmp/Llama-2-13b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-13b-hf --output-dir /tmp/Llama-2-13b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 lora_finetune_distributed --config llama2/13B_lora
diff --git a/recipes/configs/llama2/13B_qlora_single_device.yaml b/recipes/configs/llama2/13B_qlora_single_device.yaml
index a10285544a..62e74c4e62 100644
--- a/recipes/configs/llama2/13B_qlora_single_device.yaml
+++ b/recipes/configs/llama2/13B_qlora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-13b-hf --output-dir /tmp/Llama-2-13b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-13b-hf --output-dir /tmp/Llama-2-13b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config llama2/13B_qlora_single_device
diff --git a/recipes/configs/llama2/70B_lora.yaml b/recipes/configs/llama2/70B_lora.yaml
index a67bfc9da2..bf2a0817d0 100644
--- a/recipes/configs/llama2/70B_lora.yaml
+++ b/recipes/configs/llama2/70B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-70b-hf --output-dir /tmp/Llama-2-70b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-70b-hf --output-dir /tmp/Llama-2-70b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # This config needs 8 GPUs to run
 #   # tune run --nproc_per_node 8 lora_finetune_distributed --config llama2/70B_lora
diff --git a/recipes/configs/llama2/70B_qlora.yaml b/recipes/configs/llama2/70B_qlora.yaml
index d04b7c6753..38444bf0c7 100644
--- a/recipes/configs/llama2/70B_qlora.yaml
+++ b/recipes/configs/llama2/70B_qlora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-70b-hf --output-dir /tmp/Llama-2-70b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-70b-hf --output-dir /tmp/Llama-2-70b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # This config needs 8 GPUs to run
 #   # tune run --nproc_per_node 8 lora_finetune_distributed --config llama2/70B_qlora
diff --git a/recipes/configs/llama2/7B_full.yaml b/recipes/configs/llama2/7B_full.yaml
index 3031538137..7e69c8f5a6 100644
--- a/recipes/configs/llama2/7B_full.yaml
+++ b/recipes/configs/llama2/7B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed --config llama2/7B_full
diff --git a/recipes/configs/llama2/7B_full_low_memory.yaml b/recipes/configs/llama2/7B_full_low_memory.yaml
index 07514959db..d7ee50898e 100644
--- a/recipes/configs/llama2/7B_full_low_memory.yaml
+++ b/recipes/configs/llama2/7B_full_low_memory.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/llama2/7B_lora.yaml b/recipes/configs/llama2/7B_lora.yaml
index 8e64a3fc11..5bf21ccb2c 100644
--- a/recipes/configs/llama2/7B_lora.yaml
+++ b/recipes/configs/llama2/7B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_lora
diff --git a/recipes/configs/llama2/7B_lora_dpo.yaml b/recipes/configs/llama2/7B_lora_dpo.yaml
index f3b827ae3b..abf1b43138 100644
--- a/recipes/configs/llama2/7B_lora_dpo.yaml
+++ b/recipes/configs/llama2/7B_lora_dpo.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_dpo_distributed --config llama2/7B_lora_dpo
diff --git a/recipes/configs/llama2/7B_lora_dpo_single_device.yaml b/recipes/configs/llama2/7B_lora_dpo_single_device.yaml
index 6483219e9b..7543cb5d6f 100644
--- a/recipes/configs/llama2/7B_lora_dpo_single_device.yaml
+++ b/recipes/configs/llama2/7B_lora_dpo_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_dpo_single_device --config llama2/7B_lora_dpo_single_device
diff --git a/recipes/configs/llama2/7B_lora_single_device.yaml b/recipes/configs/llama2/7B_lora_single_device.yaml
index 481fed1a7e..4196cc5a59 100644
--- a/recipes/configs/llama2/7B_lora_single_device.yaml
+++ b/recipes/configs/llama2/7B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config llama2/7B_lora_single_device
diff --git a/recipes/configs/llama2/7B_qat_full.yaml b/recipes/configs/llama2/7B_qat_full.yaml
index 1208627f1c..15a3f000e4 100644
--- a/recipes/configs/llama2/7B_qat_full.yaml
+++ b/recipes/configs/llama2/7B_qat_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 qat_distributed --config llama2/7B_qat_full
diff --git a/recipes/configs/llama2/7B_qlora.yaml b/recipes/configs/llama2/7B_qlora.yaml
index 80cee9853c..667b94c376 100644
--- a/recipes/configs/llama2/7B_qlora.yaml
+++ b/recipes/configs/llama2/7B_qlora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama2/7B_qlora
diff --git a/recipes/configs/llama2/7B_qlora_single_device.yaml b/recipes/configs/llama2/7B_qlora_single_device.yaml
index b1f119d7db..028265007e 100644
--- a/recipes/configs/llama2/7B_qlora_single_device.yaml
+++ b/recipes/configs/llama2/7B_qlora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --hf-token <HF_TOKEN>
+#   tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config llama2/7B_qlora_single_device
diff --git a/recipes/configs/llama2/generation_v2.yaml b/recipes/configs/llama2/generation_v2.yaml
index 7ce4e2c43d..7a9222862d 100644
--- a/recipes/configs/llama2/generation_v2.yaml
+++ b/recipes/configs/llama2/generation_v2.yaml
@@ -1,7 +1,7 @@
 # Config for running the InferenceRecipe in generate_V2.py to generate output from an LLM
 #
 # This config assumes that you've run the following command before launching:
-#   tune download meta-llama/Llama-2-7b-chat-hf --output-dir /tmp/Llama-2-7b-chat-hf
+#   tune download meta-llama/Llama-2-7b-chat-hf --output-dir /tmp/Llama-2-7b-chat-hf --ignore-patterns "*.safetensors"
 #
 # To launch, run the following command:
 #    tune run dev/generate_v2 --config llama2/generation_v2
diff --git a/recipes/configs/llama3/70B_full.yaml b/recipes/configs/llama3/70B_full.yaml
index 7cffc3fce9..5878b2fd95 100644
--- a/recipes/configs/llama3/70B_full.yaml
+++ b/recipes/configs/llama3/70B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-70B-Instruct --output-dir /tmp/Meta-Llama-3-70B-Instruct  --ignore-patterns "original/consolidated*" --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-70B-Instruct --output-dir /tmp/Meta-Llama-3-70B-Instruct --ignore-patterns "original/consolidated*" --hf-token <HF_TOKEN>
 #
 # To launch on 8 devices, run the following command from root:
 #   tune run --nproc_per_node 8 full_finetune_distributed --config llama3/70B_full
diff --git a/recipes/configs/llama3/8B_dora.yaml b/recipes/configs/llama3/8B_dora.yaml
index ee7a8d07f6..276f303807 100644
--- a/recipes/configs/llama3/8B_dora.yaml
+++ b/recipes/configs/llama3/8B_dora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama3/8B_dora
diff --git a/recipes/configs/llama3/8B_full.yaml b/recipes/configs/llama3/8B_full.yaml
index 9a93d59061..a065fa9ece 100644
--- a/recipes/configs/llama3/8B_full.yaml
+++ b/recipes/configs/llama3/8B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
diff --git a/recipes/configs/llama3/8B_full_single_device.yaml b/recipes/configs/llama3/8B_full_single_device.yaml
index b3fee76295..a63845fc30 100644
--- a/recipes/configs/llama3/8B_full_single_device.yaml
+++ b/recipes/configs/llama3/8B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/llama3/8B_lora.yaml b/recipes/configs/llama3/8B_lora.yaml
index 3ced0899e4..f9cb8f9d95 100644
--- a/recipes/configs/llama3/8B_lora.yaml
+++ b/recipes/configs/llama3/8B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nproc_per_node 2 lora_finetune_distributed --config llama3/8B_lora
diff --git a/recipes/configs/llama3/8B_lora_single_device.yaml b/recipes/configs/llama3/8B_lora_single_device.yaml
index 4535758ac9..5ae3a0088a 100644
--- a/recipes/configs/llama3/8B_lora_single_device.yaml
+++ b/recipes/configs/llama3/8B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config llama3/8B_lora_single_device
diff --git a/recipes/configs/llama3/8B_qat_full.yaml b/recipes/configs/llama3/8B_qat_full.yaml
index 274ee0eae0..49c5c7ee74 100644
--- a/recipes/configs/llama3/8B_qat_full.yaml
+++ b/recipes/configs/llama3/8B_qat_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nproc_per_node 4 qat_distributed --config llama3/8B_qat_full
diff --git a/recipes/configs/llama3/8B_qdora_single_device.yaml b/recipes/configs/llama3/8B_qdora_single_device.yaml
index 8eb1b5151c..823e0f75fe 100644
--- a/recipes/configs/llama3/8B_qdora_single_device.yaml
+++ b/recipes/configs/llama3/8B_qdora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config llama3/8B_qdora_single_device
diff --git a/recipes/configs/llama3/8B_qlora_single_device.yaml b/recipes/configs/llama3/8B_qlora_single_device.yaml
index 0c4ab423b8..76af71e432 100644
--- a/recipes/configs/llama3/8B_qlora_single_device.yaml
+++ b/recipes/configs/llama3/8B_qlora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
+#   tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config llama3/8B_qlora_single_device
diff --git a/recipes/configs/mistral/7B_full.yaml b/recipes/configs/mistral/7B_full.yaml
index e025cee824..23c82e1d71 100644
--- a/recipes/configs/mistral/7B_full.yaml
+++ b/recipes/configs/mistral/7B_full.yaml
@@ -7,7 +7,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download mistralai/Mistral-7B-v0.1 --hf-token <HF_TOKEN> --output-dir /tmp/Mistral-7B-v0.1
+#   tune download mistralai/Mistral-7B-v0.1 --output-dir /tmp/Mistral-7B-v0.1 --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # Run this config on 4 GPUs using the following:
 #   tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed --config mistral/7B_full
diff --git a/recipes/configs/mistral/7B_full_low_memory.yaml b/recipes/configs/mistral/7B_full_low_memory.yaml
index dbd8a9bae5..01de2f11ea 100644
--- a/recipes/configs/mistral/7B_full_low_memory.yaml
+++ b/recipes/configs/mistral/7B_full_low_memory.yaml
@@ -7,7 +7,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download mistralai/Mistral-7B-v0.1 --hf-token <HF_TOKEN> --output-dir /tmp/Mistral-7B-v0.1
+#   tune download mistralai/Mistral-7B-v0.1 --output-dir /tmp/Mistral-7B-v0.1 --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/mistral/7B_full_ppo_low_memory.yaml b/recipes/configs/mistral/7B_full_ppo_low_memory.yaml
index e05be85ff6..310c9e5bcf 100644
--- a/recipes/configs/mistral/7B_full_ppo_low_memory.yaml
+++ b/recipes/configs/mistral/7B_full_ppo_low_memory.yaml
@@ -7,8 +7,8 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download weqweasdas/RM-Mistral-7B --output-dir /tmp/RM-Mistral-7B/ --ignore-patterns None
-#   tune download mistralai/Mistral-7B-Instruct-v0.2 --output-dir /tmp/Mistral-7B-Instruct-v0.2/ --hf-token HF_TOKEN
+#   tune download weqweasdas/RM-Mistral-7B --output-dir /tmp/RM-Mistral-7B/
+#   tune download mistralai/Mistral-7B-Instruct-v0.2 --output-dir /tmp/Mistral-7B-Instruct-v0.2/ --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # You'll also need to ensure that {output_dir} exists beforehand, as checkpoints for policy and value models are saved in sub-folders.
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
diff --git a/recipes/configs/mistral/7B_lora.yaml b/recipes/configs/mistral/7B_lora.yaml
index 30919c13e8..f637240b34 100644
--- a/recipes/configs/mistral/7B_lora.yaml
+++ b/recipes/configs/mistral/7B_lora.yaml
@@ -7,7 +7,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download mistralai/Mistral-7B-v0.1 --hf-token <HF_TOKEN> --output-dir /tmp/Mistral-7B-v0.1
+#   tune download mistralai/Mistral-7B-v0.1 --output-dir /tmp/Mistral-7B-v0.1 --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # Run this config on 2 GPUs using the following:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config mistral/7B_lora
diff --git a/recipes/configs/mistral/7B_lora_single_device.yaml b/recipes/configs/mistral/7B_lora_single_device.yaml
index b6d2492bf1..c11cbe1ad2 100644
--- a/recipes/configs/mistral/7B_lora_single_device.yaml
+++ b/recipes/configs/mistral/7B_lora_single_device.yaml
@@ -7,7 +7,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download mistralai/Mistral-7B-v0.1 --hf-token <HF_TOKEN> --output-dir /tmp/Mistral-7B-v0.1
+#   tune download mistralai/Mistral-7B-v0.1 --output-dir /tmp/Mistral-7B-v0.1 --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config mistral/7B_lora_single_device
diff --git a/recipes/configs/mistral/7B_qlora_single_device.yaml b/recipes/configs/mistral/7B_qlora_single_device.yaml
index c0252fcb32..536f2efdf1 100644
--- a/recipes/configs/mistral/7B_qlora_single_device.yaml
+++ b/recipes/configs/mistral/7B_qlora_single_device.yaml
@@ -7,7 +7,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download mistralai/Mistral-7B-v0.1 --hf-token <HF_TOKEN> --output-dir /tmp/Mistral-7B-v0.1
+#   tune download mistralai/Mistral-7B-v0.1 --output-dir /tmp/Mistral-7B-v0.1 --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config mistral/7B_qlora_single_device
diff --git a/recipes/configs/phi3/mini_full.yaml b/recipes/configs/phi3/mini_full.yaml
index 1319ab816d..594ffdc916 100644
--- a/recipes/configs/phi3/mini_full.yaml
+++ b/recipes/configs/phi3/mini_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --ignore-patterns None --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
 #
 # Run this config on 4 GPUs using the following:
 #  tune run --nproc_per_node 4 full_finetune_distributed --config phi3/mini_full
diff --git a/recipes/configs/phi3/mini_full_low_memory.yaml b/recipes/configs/phi3/mini_full_low_memory.yaml
index ad7e0f4046..05c1db379a 100644
--- a/recipes/configs/phi3/mini_full_low_memory.yaml
+++ b/recipes/configs/phi3/mini_full_low_memory.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --ignore-patterns None --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/phi3/mini_lora.yaml b/recipes/configs/phi3/mini_lora.yaml
index 5547be21e0..0c13048119 100644
--- a/recipes/configs/phi3/mini_lora.yaml
+++ b/recipes/configs/phi3/mini_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --ignore-patterns None --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi3/mini_lora
diff --git a/recipes/configs/phi3/mini_lora_single_device.yaml b/recipes/configs/phi3/mini_lora_single_device.yaml
index 533972b0e1..3aae4f2b6c 100644
--- a/recipes/configs/phi3/mini_lora_single_device.yaml
+++ b/recipes/configs/phi3/mini_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --ignore-patterns None --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config phi3/mini_lora_single_device
diff --git a/recipes/configs/phi3/mini_qlora_single_device.yaml b/recipes/configs/phi3/mini_qlora_single_device.yaml
index e89bd1a542..f59a68a59d 100644
--- a/recipes/configs/phi3/mini_qlora_single_device.yaml
+++ b/recipes/configs/phi3/mini_qlora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --ignore-patterns None --hf-token <HF_TOKEN>
+#   tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config phi3/mini_qlora_single_device
diff --git a/recipes/configs/qwen2/0.5B_full.yaml b/recipes/configs/qwen2/0.5B_full.yaml
index ca5863c37c..84336894be 100644
--- a/recipes/configs/qwen2/0.5B_full.yaml
+++ b/recipes/configs/qwen2/0.5B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed --config qwen2/0.5B_full
diff --git a/recipes/configs/qwen2/0.5B_full_single_device.yaml b/recipes/configs/qwen2/0.5B_full_single_device.yaml
index 7e491216c1..8b60a17090 100644
--- a/recipes/configs/qwen2/0.5B_full_single_device.yaml
+++ b/recipes/configs/qwen2/0.5B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run full_finetune_single_device --config qwen2/0.5B_full_single_device
diff --git a/recipes/configs/qwen2/0.5B_lora.yaml b/recipes/configs/qwen2/0.5B_lora.yaml
index 9f54c5fdbe..16e5955da3 100644
--- a/recipes/configs/qwen2/0.5B_lora.yaml
+++ b/recipes/configs/qwen2/0.5B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2/0.5B_lora
diff --git a/recipes/configs/qwen2/0.5B_lora_single_device.yaml b/recipes/configs/qwen2/0.5B_lora_single_device.yaml
index e9907ec939..e54db398fb 100644
--- a/recipes/configs/qwen2/0.5B_lora_single_device.yaml
+++ b/recipes/configs/qwen2/0.5B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2/0.5B_lora_single_device
diff --git a/recipes/configs/qwen2/1.5B_full.yaml b/recipes/configs/qwen2/1.5B_full.yaml
index 812ea45b10..37b5c0a926 100644
--- a/recipes/configs/qwen2/1.5B_full.yaml
+++ b/recipes/configs/qwen2/1.5B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed --config qwen2/1.5B_full
diff --git a/recipes/configs/qwen2/1.5B_full_single_device.yaml b/recipes/configs/qwen2/1.5B_full_single_device.yaml
index 3b7642cf24..2acdfb3810 100644
--- a/recipes/configs/qwen2/1.5B_full_single_device.yaml
+++ b/recipes/configs/qwen2/1.5B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/qwen2/1.5B_lora.yaml b/recipes/configs/qwen2/1.5B_lora.yaml
index d006b29cce..aea2f79e09 100644
--- a/recipes/configs/qwen2/1.5B_lora.yaml
+++ b/recipes/configs/qwen2/1.5B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2/1.5B_lora
diff --git a/recipes/configs/qwen2/1.5B_lora_single_device.yaml b/recipes/configs/qwen2/1.5B_lora_single_device.yaml
index 1943be6cb9..2c23954be3 100644
--- a/recipes/configs/qwen2/1.5B_lora_single_device.yaml
+++ b/recipes/configs/qwen2/1.5B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2/1.5B_lora_single_device
diff --git a/recipes/configs/qwen2/7B_full.yaml b/recipes/configs/qwen2/7B_full.yaml
index d0a6726826..20d74346e1 100644
--- a/recipes/configs/qwen2/7B_full.yaml
+++ b/recipes/configs/qwen2/7B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct
 #
 # To launch on 4 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed --config qwen2/7B_full
diff --git a/recipes/configs/qwen2/7B_full_single_device.yaml b/recipes/configs/qwen2/7B_full_single_device.yaml
index 25e4a1b72b..cff3244b18 100644
--- a/recipes/configs/qwen2/7B_full_single_device.yaml
+++ b/recipes/configs/qwen2/7B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/qwen2/7B_lora.yaml b/recipes/configs/qwen2/7B_lora.yaml
index c853a7e39f..779e3fdc49 100644
--- a/recipes/configs/qwen2/7B_lora.yaml
+++ b/recipes/configs/qwen2/7B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2/7B_lora
diff --git a/recipes/configs/qwen2/7B_lora_single_device.yaml b/recipes/configs/qwen2/7B_lora_single_device.yaml
index 97204f8a1d..d8c576fc41 100644
--- a/recipes/configs/qwen2/7B_lora_single_device.yaml
+++ b/recipes/configs/qwen2/7B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-7B-Instruct --output-dir /tmp/Qwen2-7B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2/7B_lora_single_device
diff --git a/recipes/configs/qwen2/knowledge_distillation_distributed.yaml b/recipes/configs/qwen2/knowledge_distillation_distributed.yaml
index aad21f1e86..d94f15c54e 100644
--- a/recipes/configs/qwen2/knowledge_distillation_distributed.yaml
+++ b/recipes/configs/qwen2/knowledge_distillation_distributed.yaml
@@ -3,8 +3,8 @@
 #
 # This config assumes that you've ran the following commands before launching KD:
 # First download the student and teacher models
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
-#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct
+#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct
 #
 # You get better results using KD if the teacher model has already been fine-tuned on the target dataset:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2/1.5B_lora
diff --git a/recipes/configs/qwen2/knowledge_distillation_single_device.yaml b/recipes/configs/qwen2/knowledge_distillation_single_device.yaml
index 8246cf2e01..70c3496d0e 100644
--- a/recipes/configs/qwen2/knowledge_distillation_single_device.yaml
+++ b/recipes/configs/qwen2/knowledge_distillation_single_device.yaml
@@ -3,8 +3,8 @@
 #
 # This config assumes that you've ran the following commands before launching KD:
 # First download the student and teacher models
-#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct --ignore-patterns None
-#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2-0.5B-Instruct --output-dir /tmp/Qwen2-0.5B-Instruct
+#   tune download Qwen/Qwen2-1.5B-Instruct --output-dir /tmp/Qwen2-1.5B-Instruct
 #
 # You get better results using KD if the teacher model has already been fine-tuned on the target dataset:
 #   tune run lora_finetune_single_device --config qwen2/1.5B_lora_single_device
diff --git a/recipes/configs/qwen2_5/0.5B_full.yaml b/recipes/configs/qwen2_5/0.5B_full.yaml
index 775e5db2d1..1298c058e9 100644
--- a/recipes/configs/qwen2_5/0.5B_full.yaml
+++ b/recipes/configs/qwen2_5/0.5B_full.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 0.5B model
 #
 # This config assumes that you've run the following command before launching:
-#   tune download Qwen/Qwen2.5-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-0.5B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nproc_per_node 2 full_finetune_distributed --config qwen2_5/0.5B_full
diff --git a/recipes/configs/qwen2_5/0.5B_full_single_device.yaml b/recipes/configs/qwen2_5/0.5B_full_single_device.yaml
index f668f8416b..39dfb2f8a0 100644
--- a/recipes/configs/qwen2_5/0.5B_full_single_device.yaml
+++ b/recipes/configs/qwen2_5/0.5B_full_single_device.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 0.5B
 #
 # This config assumes that you've run the following command before launching:
-#   tune download Qwen/Qwen2.5-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-0.5B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run full_finetune_single_device --config qwen2_5/0.5B_full_single_device
diff --git a/recipes/configs/qwen2_5/0.5B_lora.yaml b/recipes/configs/qwen2_5/0.5B_lora.yaml
index 3507542663..50fe1a0a28 100644
--- a/recipes/configs/qwen2_5/0.5B_lora.yaml
+++ b/recipes/configs/qwen2_5/0.5B_lora.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 0.5B model
 #
 # This config assumes that you've run the following command before launching:
-#   tune download Qwen/Qwen2.5-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-0.5B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/0.5B_lora
diff --git a/recipes/configs/qwen2_5/0.5B_lora_single_device.yaml b/recipes/configs/qwen2_5/0.5B_lora_single_device.yaml
index 8f4e309279..fa507e3414 100644
--- a/recipes/configs/qwen2_5/0.5B_lora_single_device.yaml
+++ b/recipes/configs/qwen2_5/0.5B_lora_single_device.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 0.5B model
 #
 # This config assumes that you've run the following command before launching
-#   tune download Qwen/Qwen2.5-0.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-0.5B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/0.5B_lora_single_device
diff --git a/recipes/configs/qwen2_5/1.5B_full.yaml b/recipes/configs/qwen2_5/1.5B_full.yaml
index 8eb535df2c..e0fb09c152 100644
--- a/recipes/configs/qwen2_5/1.5B_full.yaml
+++ b/recipes/configs/qwen2_5/1.5B_full.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 1.5B model
 #
 # This config assumes that you've run the following command before launching:
-#   tune download Qwen/Qwen2.5-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-1.5B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nproc_per_node 2 full_finetune_distributed --config qwen2_5/1.5B_full
diff --git a/recipes/configs/qwen2_5/1.5B_full_single_device.yaml b/recipes/configs/qwen2_5/1.5B_full_single_device.yaml
index b0e860548d..480249631d 100644
--- a/recipes/configs/qwen2_5/1.5B_full_single_device.yaml
+++ b/recipes/configs/qwen2_5/1.5B_full_single_device.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 1.5B
 #
 # This config assumes that you've run the following command before launching:
-#   tune download Qwen/Qwen2.5-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-1.5B-Instruct
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with:
diff --git a/recipes/configs/qwen2_5/1.5B_lora.yaml b/recipes/configs/qwen2_5/1.5B_lora.yaml
index c44bea6124..8d530c3670 100644
--- a/recipes/configs/qwen2_5/1.5B_lora.yaml
+++ b/recipes/configs/qwen2_5/1.5B_lora.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 1.5B model
 #
 # This config assumes that you've run the following command before launching:
-#   tune download Qwen/Qwen2.5-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-1.5B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/1.5B_lora
diff --git a/recipes/configs/qwen2_5/1.5B_lora_single_device.yaml b/recipes/configs/qwen2_5/1.5B_lora_single_device.yaml
index 168dea810c..e784066fe0 100644
--- a/recipes/configs/qwen2_5/1.5B_lora_single_device.yaml
+++ b/recipes/configs/qwen2_5/1.5B_lora_single_device.yaml
@@ -2,7 +2,7 @@
 # using a Qwen2.5 1.5B model
 #
 # This config assumes that you've run the following command before launching:
-#   tune download Qwen/Qwen2.5-1.5B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-1.5B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/1.5B_lora_single_device
diff --git a/recipes/configs/qwen2_5/14B_lora_single_device.yaml b/recipes/configs/qwen2_5/14B_lora_single_device.yaml
index 002129641a..2886a56664 100644
--- a/recipes/configs/qwen2_5/14B_lora_single_device.yaml
+++ b/recipes/configs/qwen2_5/14B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-14B-Instruct --output-dir /tmp/Qwen2_5-14B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-14B-Instruct --output-dir /tmp/Qwen2_5-14B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/14B_lora_single_device
diff --git a/recipes/configs/qwen2_5/32B_lora.yaml b/recipes/configs/qwen2_5/32B_lora.yaml
index 28cda4f662..bed3868365 100644
--- a/recipes/configs/qwen2_5/32B_lora.yaml
+++ b/recipes/configs/qwen2_5/32B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-32B-Instruct --output-dir /tmp/Qwen2_5-32B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-32B-Instruct --output-dir /tmp/Qwen2_5-32B-Instruct
 #
 # To launch on 8 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 8 lora_finetune_distributed --config qwen2_5/32B_lora
diff --git a/recipes/configs/qwen2_5/3B_full.yaml b/recipes/configs/qwen2_5/3B_full.yaml
index 3fb2d23df0..7267dd5efe 100644
--- a/recipes/configs/qwen2_5/3B_full.yaml
+++ b/recipes/configs/qwen2_5/3B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/3B_full
diff --git a/recipes/configs/qwen2_5/3B_full_single_device.yaml b/recipes/configs/qwen2_5/3B_full_single_device.yaml
index a5b028c659..ef8d283098 100644
--- a/recipes/configs/qwen2_5/3B_full_single_device.yaml
+++ b/recipes/configs/qwen2_5/3B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/qwen2_5/3B_lora.yaml b/recipes/configs/qwen2_5/3B_lora.yaml
index ffd3b6c494..6cde39b86e 100644
--- a/recipes/configs/qwen2_5/3B_lora.yaml
+++ b/recipes/configs/qwen2_5/3B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/3B_lora
diff --git a/recipes/configs/qwen2_5/3B_lora_single_device.yaml b/recipes/configs/qwen2_5/3B_lora_single_device.yaml
index b6c5be1a0a..bd3cb9fa68 100644
--- a/recipes/configs/qwen2_5/3B_lora_single_device.yaml
+++ b/recipes/configs/qwen2_5/3B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-3B-Instruct --output-dir /tmp/Qwen2_5-3B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/3B_lora_single_device
diff --git a/recipes/configs/qwen2_5/72B_lora.yaml b/recipes/configs/qwen2_5/72B_lora.yaml
index 99019e6c43..fc7ad2dc7d 100644
--- a/recipes/configs/qwen2_5/72B_lora.yaml
+++ b/recipes/configs/qwen2_5/72B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-72B-Instruct --output-dir /tmp/Qwen2_5-72B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-72B-Instruct --output-dir /tmp/Qwen2_5-72B-Instruct
 #
 # To launch on 8 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 8 lora_finetune_distributed --config qwen2_5/72B_lora
diff --git a/recipes/configs/qwen2_5/7B_full.yaml b/recipes/configs/qwen2_5/7B_full.yaml
index f6bab9f108..e1de8d5584 100644
--- a/recipes/configs/qwen2_5/7B_full.yaml
+++ b/recipes/configs/qwen2_5/7B_full.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/7B_full
diff --git a/recipes/configs/qwen2_5/7B_full_single_device.yaml b/recipes/configs/qwen2_5/7B_full_single_device.yaml
index 0986591e53..3bc3428410 100644
--- a/recipes/configs/qwen2_5/7B_full_single_device.yaml
+++ b/recipes/configs/qwen2_5/7B_full_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
diff --git a/recipes/configs/qwen2_5/7B_lora.yaml b/recipes/configs/qwen2_5/7B_lora.yaml
index b59ac69bcd..460c67d26f 100644
--- a/recipes/configs/qwen2_5/7B_lora.yaml
+++ b/recipes/configs/qwen2_5/7B_lora.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2_5/7B_lora
diff --git a/recipes/configs/qwen2_5/7B_lora_single_device.yaml b/recipes/configs/qwen2_5/7B_lora_single_device.yaml
index a030c2fba2..5c3353f7e9 100644
--- a/recipes/configs/qwen2_5/7B_lora_single_device.yaml
+++ b/recipes/configs/qwen2_5/7B_lora_single_device.yaml
@@ -3,7 +3,7 @@
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct --ignore-patterns None
+#   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config qwen2_5/7B_lora_single_device
diff --git a/tests/torchtune/_cli/test_download.py b/tests/torchtune/_cli/test_download.py
index 3338adc870..f89b91a845 100644
--- a/tests/torchtune/_cli/test_download.py
+++ b/tests/torchtune/_cli/test_download.py
@@ -34,7 +34,7 @@ def snapshot_download(self, mocker, tmpdir):
 
     def test_download_calls_snapshot(self, capsys, monkeypatch, snapshot_download):
         model = "meta-llama/Llama-2-7b"
-        testargs = f"tune download {model}".split()
+        testargs = f"tune download {model} --ignore-patterns *.safetensors".split()
         monkeypatch.setattr(sys, "argv", testargs)
 
         # Call the first time and get GatedRepoError
diff --git a/torchtune/_cli/download.py b/torchtune/_cli/download.py
index a60cf370ab..55f5ef4ab1 100644
--- a/torchtune/_cli/download.py
+++ b/torchtune/_cli/download.py
@@ -108,9 +108,8 @@ def _add_arguments(self) -> None:
             "--ignore-patterns",
             type=str,
             required=False,
-            default="*.safetensors",
-            help="If provided, files matching any of the patterns are not downloaded. Defaults to ignoring "
-            "safetensors files to avoid downloading duplicate weights. Only supported for Hugging Face Hub models.",
+            help="If provided, files matching any of the patterns are not downloaded. Example: '*.safetensors'. "
+            "Only supported for Hugging Face Hub models.",
         )
         self._parser.add_argument(
             "--source",