-
Notifications
You must be signed in to change notification settings - Fork 678
Fix Qlora/lora for 3.2 vision #2028
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -338,7 +338,6 @@ def lora_llama3_2_vision_encoder( | |
fusion_lora: bool, | ||
lora_attn_modules: List[LORA_ATTN_MODULES], | ||
apply_lora_to_mlp: bool = False, | ||
apply_lora_to_output: bool = False, | ||
*, | ||
# clip encoder parameters | ||
patch_size: int, | ||
|
@@ -377,8 +376,6 @@ def lora_llama3_2_vision_encoder( | |
``{"q_proj", "k_proj", "v_proj", "output_proj"}``. | ||
apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer. | ||
Default: False | ||
apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection. | ||
Default: False | ||
patch_size (int): The size of each patch. Used to divide the tiles into patches. | ||
E.g. for ``patch_size=40``, a tile of shape (400, 400) will have 10x10 grid of patches | ||
with shape (40, 40) each. | ||
|
@@ -412,7 +409,6 @@ def lora_llama3_2_vision_encoder( | |
lora_options = { | ||
"lora_modules": lora_attn_modules, | ||
"apply_lora_to_mlp": apply_lora_to_mlp, | ||
"apply_lora_to_output": apply_lora_to_output, | ||
"lora_rank": lora_rank, | ||
"lora_alpha": lora_alpha, | ||
"lora_dropout": lora_dropout, | ||
|
@@ -679,7 +675,6 @@ def lora_llama3_2_vision_projection_head( | |
num_hidden_inputs: int, | ||
# LoRA args | ||
apply_lora_to_mlp: bool, | ||
apply_lora_to_output: bool, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should stay in the projection head, it just doesn't make sense in the CLIP model |
||
lora_rank: int, | ||
lora_alpha: float, | ||
lora_dropout: float = 0.0, | ||
|
@@ -701,8 +696,6 @@ def lora_llama3_2_vision_projection_head( | |
num_hidden_inputs (int): number of hidden inputs to the projection head. | ||
apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer. | ||
Default: False | ||
apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection. | ||
Default: False | ||
lora_rank (int): rank of each low-rank approximation | ||
lora_alpha (float): scaling factor for the low-rank approximation | ||
lora_dropout (float): LoRA dropout probability. Default: 0.0 | ||
|
@@ -773,19 +766,7 @@ def lora_llama3_2_vision_projection_head( | |
# cross encoding | ||
# TODO: quantize_base is not applied to final output_proj currently. | ||
proj_in = clip_embed_dim * (num_hidden_inputs + 1) | ||
adapter_cls = DoRALinear if use_dora else LoRALinear | ||
output_proj = ( | ||
adapter_cls( | ||
proj_in, | ||
decoder_embed_dim, | ||
rank=lora_rank, | ||
alpha=lora_alpha, | ||
dropout=lora_dropout, | ||
use_bias=True, | ||
) | ||
if apply_lora_to_output | ||
else nn.Linear(proj_in, decoder_embed_dim) | ||
) | ||
output_proj = nn.Linear(proj_in, decoder_embed_dim) | ||
return Llama3VisionProjectionHead( | ||
layers=layers, | ||
output=output_proj, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,7 +65,7 @@ def __init__( | |
self.use_bias = use_bias | ||
self._quantize_base = quantize_base | ||
|
||
if not self._quantize_base and quantization_kwargs: | ||
if not self._quantize_base and any([v for v in quantization_kwargs.values()]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good question! it outputs something like: {use_lora_on_output: None}. Even though the value is None, it has a key, which fails the check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If quantize_base is false, then we should NOT have any quantization args, right? If no quantization is happenning, then why have these args? So this assertion was failing, because quantize_base WAS false (not False --> True), but we have {use_lora_on_output: None}, which returns True. So if True and True: raise error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with Rafi here, there shouldn't be any kwargs passed in if quantize_base is False. {use_lora_on_output: None} should not be passed in when quantize_base = False. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
raise ValueError( | ||
f"``quantize_base`` is False, but received the following quantization arguments: {quantization_kwargs}" | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be removed from here and manually added to the projection head call