diff --git a/recipes/configs/llama3_2_vision/11B_qlora.yaml b/recipes/configs/llama3_2_vision/11B_qlora.yaml index d18209adfe..c934e78008 100644 --- a/recipes/configs/llama3_2_vision/11B_qlora.yaml +++ b/recipes/configs/llama3_2_vision/11B_qlora.yaml @@ -87,7 +87,7 @@ metric_logger: _component_: torchtune.training.metric_logging.DiskLogger log_dir: /tmp/Llama-3.2-11B-Vision-Instruct/logs log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Profiler (disabled) profiler: diff --git a/recipes/configs/llama3_2_vision/11B_qlora_single_device.yaml b/recipes/configs/llama3_2_vision/11B_qlora_single_device.yaml index 8261d8eeac..531f27a52f 100644 --- a/recipes/configs/llama3_2_vision/11B_qlora_single_device.yaml +++ b/recipes/configs/llama3_2_vision/11B_qlora_single_device.yaml @@ -87,7 +87,7 @@ metric_logger: _component_: torchtune.training.metric_logging.DiskLogger log_dir: /tmp/Llama-3.2-11B-Vision-Instruct/logs log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Profiler (disabled) profiler: diff --git a/recipes/configs/llama3_2_vision/90B_full.yaml b/recipes/configs/llama3_2_vision/90B_full.yaml index 09a7a22769..2ef3c271eb 100644 --- a/recipes/configs/llama3_2_vision/90B_full.yaml +++ b/recipes/configs/llama3_2_vision/90B_full.yaml @@ -78,7 +78,7 @@ metric_logger: _component_: torchtune.training.metric_logging.DiskLogger log_dir: /tmp/Llama-3.2-90B-Vision-Instruct/logs log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Profiler (disabled) profiler: diff --git a/recipes/configs/llama3_2_vision/90B_lora.yaml b/recipes/configs/llama3_2_vision/90B_lora.yaml index 14388cc4ea..970c7dab81 100644 --- a/recipes/configs/llama3_2_vision/90B_lora.yaml +++ b/recipes/configs/llama3_2_vision/90B_lora.yaml @@ -87,7 +87,7 @@ metric_logger: _component_: torchtune.training.metric_logging.DiskLogger log_dir: /tmp/Llama-3.2-90B-Vision-Instruct/logs log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Profiler (disabled) profiler: diff --git a/recipes/configs/llama3_2_vision/90B_qlora.yaml b/recipes/configs/llama3_2_vision/90B_qlora.yaml index 30810e90b1..888093d574 100644 --- a/recipes/configs/llama3_2_vision/90B_qlora.yaml +++ b/recipes/configs/llama3_2_vision/90B_qlora.yaml @@ -86,7 +86,7 @@ metric_logger: _component_: torchtune.training.metric_logging.DiskLogger log_dir: /tmp/Llama-3.2-90B-Vision-Instruct/logs log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Profiler (disabled) profiler: diff --git a/torchtune/models/llama3_2_vision/_component_builders.py b/torchtune/models/llama3_2_vision/_component_builders.py index 3de323d368..6db3631444 100644 --- a/torchtune/models/llama3_2_vision/_component_builders.py +++ b/torchtune/models/llama3_2_vision/_component_builders.py @@ -377,7 +377,7 @@ def lora_llama3_2_vision_encoder( ``{"q_proj", "k_proj", "v_proj", "output_proj"}``. apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer. Default: False - apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection. + apply_lora_to_output (bool): whether to apply LoRA to the model's decoder and encoder output projection. Default: False patch_size (int): The size of each patch. Used to divide the tiles into patches. E.g. for ``patch_size=40``, a tile of shape (400, 400) will have 10x10 grid of patches @@ -412,7 +412,6 @@ def lora_llama3_2_vision_encoder( lora_options = { "lora_modules": lora_attn_modules, "apply_lora_to_mlp": apply_lora_to_mlp, - "apply_lora_to_output": apply_lora_to_output, "lora_rank": lora_rank, "lora_alpha": lora_alpha, "lora_dropout": lora_dropout, @@ -450,7 +449,9 @@ def lora_llama3_2_vision_encoder( } if fusion_lora: projection_head = lora_llama3_2_vision_projection_head( - **projection_options, **lora_options + apply_lora_to_output=apply_lora_to_output, + **projection_options, + **lora_options, ) else: projection_head = lora_llama3_2_vision_projection_head(**projection_options) @@ -700,9 +701,7 @@ def lora_llama3_2_vision_projection_head( clip_embed_dim (int): embedding dimension for the CLIP encoder. num_hidden_inputs (int): number of hidden inputs to the projection head. apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer. - Default: False apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection. - Default: False lora_rank (int): rank of each low-rank approximation lora_alpha (float): scaling factor for the low-rank approximation lora_dropout (float): LoRA dropout probability. Default: 0.0 @@ -724,7 +723,7 @@ def lora_llama3_2_vision_projection_head( lora_modules=lora_modules, embed_dim=clip_embed_dim, num_heads=num_heads, - num_kv_heads=num_heads, + num_kv_heads=num_kv_heads, head_dim=head_dim, attn_dropout=0.0, lora_rank=lora_rank, diff --git a/torchtune/modules/peft/dora.py b/torchtune/modules/peft/dora.py index bc1e5eeb03..6f097da6d0 100644 --- a/torchtune/modules/peft/dora.py +++ b/torchtune/modules/peft/dora.py @@ -65,7 +65,7 @@ def __init__( self.use_bias = use_bias self._quantize_base = quantize_base - if not self._quantize_base and quantization_kwargs: + if not self._quantize_base and any([v for v in quantization_kwargs.values()]): raise ValueError( f"``quantize_base`` is False, but received the following quantization arguments: {quantization_kwargs}" ) diff --git a/torchtune/modules/peft/lora.py b/torchtune/modules/peft/lora.py index 138dd0c5ee..e03d854f1f 100644 --- a/torchtune/modules/peft/lora.py +++ b/torchtune/modules/peft/lora.py @@ -65,7 +65,7 @@ def __init__( self.use_bias = use_bias self._quantize_base = quantize_base - if not self._quantize_base and quantization_kwargs: + if not self._quantize_base and any([v for v in quantization_kwargs.values()]): raise ValueError( f"``quantize_base`` is False, but received the following quantization arguments: {quantization_kwargs}" )