From c618f0fd6d216fd63d9bcd24736b80e5fe396058 Mon Sep 17 00:00:00 2001 From: Max Zuo Date: Wed, 13 Aug 2025 23:37:51 -0400 Subject: [PATCH 1/4] added support for seq2seq models --- unsloth/models/loader.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 7ac27158a..29d7369a2 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -479,6 +479,7 @@ def from_pretrained( from .vision import FastBaseModel from transformers import ( AutoModelForCausalLM, + AutoModelForSeq2SeqLM, ) try: from transformers import AutoModelForImageTextToText @@ -823,8 +824,12 @@ def from_pretrained( # Check if VLM is_vlm = any(x.endswith("ForConditionalGeneration") for x in model_config.architectures) is_vlm = is_vlm or hasattr(model_config, "vision_config") - if auto_model is None: - auto_model = AutoModelForVision2Seq if is_vlm else AutoModelForCausalLM + if AutoModelForSeq2SeqLM._model_mapping.get(type(model_config), None) is not None: + auto_model = AutoModelForSeq2SeqLM + elif is_vlm: + auto_model = AutoModelForVision2Seq + else: + auto_model = AutoModelForCausalLM model, tokenizer = FastBaseModel.from_pretrained( model_name = model_name, From 9ae79c1a070d6d3f6ac3fb1f04a921e954b394e8 Mon Sep 17 00:00:00 2001 From: Max Zuo Date: Wed, 13 Aug 2025 23:50:14 -0400 Subject: [PATCH 2/4] only trigger logic if auto_model is None (kept from original logic) --- unsloth/models/loader.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 29d7369a2..59fc1e0cf 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -824,12 +824,13 @@ def from_pretrained( # Check if VLM is_vlm = any(x.endswith("ForConditionalGeneration") for x in model_config.architectures) is_vlm = is_vlm or hasattr(model_config, "vision_config") - if AutoModelForSeq2SeqLM._model_mapping.get(type(model_config), None) is not None: - auto_model = AutoModelForSeq2SeqLM - elif is_vlm: - auto_model = AutoModelForVision2Seq - else: - auto_model = AutoModelForCausalLM + if auto_model is None: + if AutoModelForSeq2SeqLM._model_mapping.get(type(model_config), None) is not None: + auto_model = AutoModelForSeq2SeqLM + elif is_vlm: + auto_model = AutoModelForVision2Seq + else: + auto_model = AutoModelForCausalLM model, tokenizer = FastBaseModel.from_pretrained( model_name = model_name, From 77c4f7683f01f91b975fa3c3bff07f1a1e8f8f0f Mon Sep 17 00:00:00 2001 From: Max Zuo Date: Thu, 14 Aug 2025 11:05:14 -0400 Subject: [PATCH 3/4] added support for peft loading of Seq2Seq LM --- unsloth/models/llama.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index eafbd5a43..94b617275 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -72,7 +72,7 @@ from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING from transformers import set_seed as transformers_set_seed from peft import LoraConfig, TaskType, get_peft_model as _get_peft_model -from peft import PeftModelForCausalLM, PeftModelForSequenceClassification +from peft import PeftModelForCausalLM, PeftModelForSequenceClassification, PeftModelForSeq2SeqLM from ..save import patch_saving_functions import re, os, inspect, math, sys import types @@ -2292,7 +2292,7 @@ def get_peft_model( if r <= 0: raise TypeError(f"Unsloth: Rank of {str(r)} must be larger than 0.") - if isinstance(model, PeftModelForCausalLM) or isinstance(model, PeftModelForSequenceClassification): + if any(functools.partial(isinstance, model), (PeftModelForCausalLM, PeftModelForSequenceClassification, PeftModelForSeq2SeqLM)): # Check if exactly the same and then pass through! assert(hasattr(model, "peft_config")) @@ -2557,8 +2557,14 @@ def get_peft_model( raise NotImplementedError("Unsloth: Currently fast inference does not work with using biases for LoRA.") pass - #d oes not get lora yet, so get name from model, not base model - is_classification = "Classification" in str(type(model)) + # does not get lora yet, so get name from model, not base model + model_type = type(model) + if model_type in AutoModelForSeq2SeqLM._model_mapping.values(): + task_type = TaskType.SEQ_2_SEQ_LM + elif model_type in AutoModelForSequenceClassification._model_mapping.values(): + task_type = TaskType.SEQ_CLS + else: + task_type = TaskType.CAUSAL_LM arguments = dict( r = r, @@ -2566,7 +2572,7 @@ def get_peft_model( target_modules = final_modules, lora_dropout = lora_dropout, bias = bias, - task_type = TaskType.CAUSAL_LM if not is_classification else TaskType.SEQ_CLS, + task_type = task_type, layers_to_transform = layers_to_transform, init_lora_weights = init_lora_weights, loftq_config = loftq_config, @@ -2701,7 +2707,7 @@ def patch_peft_model( use_gradient_checkpointing = use_gradient_checkpointing, ) pass - if not isinstance(model, PeftModelForCausalLM) and not isinstance(model, PeftModelForSequenceClassification): + if not any(functools.partial(isinstance, model), (PeftModelForCausalLM, PeftModelForSequenceClassification, PeftModelForSeq2SeqLM)): raise TypeError( "Unsloth: Your model needs to call `.get_peft_model` first!" ) From 765dd7733691b2ceeabcf0ccf0111e161e0698ef Mon Sep 17 00:00:00 2001 From: Max Zuo Date: Thu, 14 Aug 2025 12:46:30 -0400 Subject: [PATCH 4/4] added missing import --- unsloth/models/llama.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index 94b617275..b72d8defa 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -68,7 +68,14 @@ LlamaFlashAttention2 = LlamaAttention pass -from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, BitsAndBytesConfig, AutoConfig +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, + AutoModelForSequenceClassification, + AutoModelForSeq2SeqLM, + BitsAndBytesConfig, + AutoConfig, +) from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING from transformers import set_seed as transformers_set_seed from peft import LoraConfig, TaskType, get_peft_model as _get_peft_model