Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2_vision/11B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: /tmp/Llama-3.2-11B-Vision-Instruct/logs
log_every_n_steps: 1
log_peak_memory_stats: False
log_peak_memory_stats: True

# Profiler (disabled)
profiler:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: /tmp/Llama-3.2-11B-Vision-Instruct/logs
log_every_n_steps: 1
log_peak_memory_stats: False
log_peak_memory_stats: True

# Profiler (disabled)
profiler:
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2_vision/90B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: /tmp/Llama-3.2-90B-Vision-Instruct/logs
log_every_n_steps: 1
log_peak_memory_stats: False
log_peak_memory_stats: True

# Profiler (disabled)
profiler:
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2_vision/90B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: /tmp/Llama-3.2-90B-Vision-Instruct/logs
log_every_n_steps: 1
log_peak_memory_stats: False
log_peak_memory_stats: True

# Profiler (disabled)
profiler:
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2_vision/90B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: /tmp/Llama-3.2-90B-Vision-Instruct/logs
log_every_n_steps: 1
log_peak_memory_stats: False
log_peak_memory_stats: True

# Profiler (disabled)
profiler:
Expand Down
11 changes: 5 additions & 6 deletions torchtune/models/llama3_2_vision/_component_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def lora_llama3_2_vision_encoder(
``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
Default: False
apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection.
apply_lora_to_output (bool): whether to apply LoRA to the model's decoder and encoder output projection.
Default: False
patch_size (int): The size of each patch. Used to divide the tiles into patches.
E.g. for ``patch_size=40``, a tile of shape (400, 400) will have 10x10 grid of patches
Expand Down Expand Up @@ -412,7 +412,6 @@ def lora_llama3_2_vision_encoder(
lora_options = {
"lora_modules": lora_attn_modules,
"apply_lora_to_mlp": apply_lora_to_mlp,
"apply_lora_to_output": apply_lora_to_output,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be removed from here and manually added to the projection head call

"lora_rank": lora_rank,
"lora_alpha": lora_alpha,
"lora_dropout": lora_dropout,
Expand Down Expand Up @@ -450,7 +449,9 @@ def lora_llama3_2_vision_encoder(
}
if fusion_lora:
projection_head = lora_llama3_2_vision_projection_head(
**projection_options, **lora_options
apply_lora_to_output=apply_lora_to_output,
**projection_options,
**lora_options,
)
else:
projection_head = lora_llama3_2_vision_projection_head(**projection_options)
Expand Down Expand Up @@ -700,9 +701,7 @@ def lora_llama3_2_vision_projection_head(
clip_embed_dim (int): embedding dimension for the CLIP encoder.
num_hidden_inputs (int): number of hidden inputs to the projection head.
apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
Default: False
apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection.
Default: False
lora_rank (int): rank of each low-rank approximation
lora_alpha (float): scaling factor for the low-rank approximation
lora_dropout (float): LoRA dropout probability. Default: 0.0
Expand All @@ -724,7 +723,7 @@ def lora_llama3_2_vision_projection_head(
lora_modules=lora_modules,
embed_dim=clip_embed_dim,
num_heads=num_heads,
num_kv_heads=num_heads,
num_kv_heads=num_kv_heads,
head_dim=head_dim,
attn_dropout=0.0,
lora_rank=lora_rank,
Expand Down
2 changes: 1 addition & 1 deletion torchtune/modules/peft/dora.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(
self.use_bias = use_bias
self._quantize_base = quantize_base

if not self._quantize_base and quantization_kwargs:
if not self._quantize_base and any([v for v in quantization_kwargs.values()]):
raise ValueError(
f"``quantize_base`` is False, but received the following quantization arguments: {quantization_kwargs}"
)
Expand Down
2 changes: 1 addition & 1 deletion torchtune/modules/peft/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(
self.use_bias = use_bias
self._quantize_base = quantize_base

if not self._quantize_base and quantization_kwargs:
if not self._quantize_base and any([v for v in quantization_kwargs.values()]):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did and quantization_kwargs not work for a dictionary? you could also just check for length

Copy link
Contributor Author

@felipemello1 felipemello1 Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good question! it outputs something like: {use_lora_on_output: None}. Even though the value is None, it has a key, which fails the check

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In [1]: any([None])
Out[1]: False

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry {use_lora_on_output: None} should fail the check?

Copy link
Contributor Author

@felipemello1 felipemello1 Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If quantize_base is false, then we should NOT have any quantization args, right? If no quantization is happenning, then why have these args?

So this assertion was failing, because quantize_base WAS false (not False --> True), but we have {use_lora_on_output: None}, which returns True.

So if True and True: raise error.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Rafi here, there shouldn't be any kwargs passed in if quantize_base is False. {use_lora_on_output: None} should not be passed in when quantize_base = False.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

raise ValueError(
f"``quantize_base`` is False, but received the following quantization arguments: {quantization_kwargs}"
)
Expand Down
Loading