unslothai
diff --git a/‎tests/saving/gpt-oss-merge/test_merged_model.py‎
Lines changed: 13 additions & 6 deletions b/‎tests/saving/gpt-oss-merge/test_merged_model.py‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎tests/saving/gpt-oss-merge/train_and_merge.py‎
Lines changed: 41 additions & 8 deletions b/‎tests/saving/gpt-oss-merge/train_and_merge.py‎
Lines changed: 41 additions & 8 deletions
diff --git a/‎tests/saving/language_models/test_merge_4bit_validation.py‎
Lines changed: 52 additions & 27 deletions b/‎tests/saving/language_models/test_merge_4bit_validation.py‎
Lines changed: 52 additions & 27 deletions
@@ -6,6 +6,7 @@
 import os
 import shutil
 
+
 def safe_remove_directory(path):
     try:
         if os.path.exists(path) and os.path.isdir(path):
@@ -17,6 +18,8 @@ def safe_remove_directory(path):
     except Exception as e:
         print(f"Failed to remove directory {path}: {e}")
         return False
+
+
 pass
 
 print("🔥 Loading the 16-bit merged model from disk...")
@@ -35,13 +38,15 @@ def safe_remove_directory(path):
 ]
 inputs = merged_tokenizer.apply_chat_template(
     messages,
-    add_generation_prompt = True,
-    return_tensors = "pt",
-    return_dict = True,
-    reasoning_effort = "low", # **NEW!** Set reasoning effort to low, medium or high
+    add_generation_prompt=True,
+    return_tensors="pt",
+    return_dict=True,
+    reasoning_effort="low",  # **NEW!** Set reasoning effort to low, medium or high
 ).to(merged_model.device)
 
-_ = merged_model.generate(**inputs, max_new_tokens = 512, streamer = TextStreamer(merged_tokenizer))
+_ = merged_model.generate(
+    **inputs, max_new_tokens=512, streamer=TextStreamer(merged_tokenizer)
+)
 print("\n✅ Inference complete.")
 
 # --- Final Cleanup ---
@@ -51,5 +56,7 @@ def safe_remove_directory(path):
 gc.collect()
 
 safe_remove_directory("./gpt-oss-finetuned-merged")
-safe_remove_directory("./unsloth_compiled_cache") # Clean up cache created by this process
+safe_remove_directory(
+    "./unsloth_compiled_cache"
+)  # Clean up cache created by this process
 print("✅ Final cleanup complete. Exiting inference script.")
@@ -7,6 +7,7 @@
 import os
 import shutil
 
+
 def safe_remove_directory(path):
     try:
         if os.path.exists(path) and os.path.isdir(path):
@@ -18,15 +19,25 @@ def safe_remove_directory(path):
     except Exception as e:
         print(f"Failed to remove directory {path}: {e}")
         return False
+
+
 pass
 
 # This tokenizer will be used by the mapping function
 tokenizer = None
+
+
 def formatting_prompts_func(examples):
     convos = examples["messages"]
-    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
+    texts = [
+        tokenizer.apply_chat_template(
+            convo, tokenize=False, add_generation_prompt=False
+        )
+        for convo in convos
+    ]
     return {"text": texts}
 
+
 # --- Load 4-bit Model and Train ---
 print("Loading 4-bit Mxfp4 gpt-oss model for training...")
 max_seq_length = 1024
@@ -39,15 +50,33 @@ def formatting_prompts_func(examples):
 )
 
 model = FastLanguageModel.get_peft_model(
-    model, r=8, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
-    lora_alpha=16, use_gradient_checkpointing="unsloth", random_state=3407,
+    model,
+    r=8,
+    target_modules=[
+        "q_proj",
+        "k_proj",
+        "v_proj",
+        "o_proj",
+        "gate_proj",
+        "up_proj",
+        "down_proj",
+    ],
+    lora_alpha=16,
+    use_gradient_checkpointing="unsloth",
+    random_state=3407,
 )
 
 trainer = SFTTrainer(
-    model=model, tokenizer=tokenizer, train_dataset=dataset,
+    model=model,
+    tokenizer=tokenizer,
+    train_dataset=dataset,
     args=SFTConfig(
-        per_device_train_batch_size=1, gradient_accumulation_steps=4, max_steps=10,
-        learning_rate=2e-4, output_dir="outputs", report_to="none",
+        per_device_train_batch_size=1,
+        gradient_accumulation_steps=4,
+        max_steps=10,
+        learning_rate=2e-4,
+        output_dir="outputs",
+        report_to="none",
     ),
 )
 
@@ -57,7 +86,9 @@ def formatting_prompts_func(examples):
 
 # --- Merge and Save ---
 print("\n💾 Merging and saving the 16-bit model to './gpt-oss-finetuned-merged'...")
-model.save_pretrained_merged(save_directory="./gpt-oss-finetuned-merged", tokenizer=tokenizer)
+model.save_pretrained_merged(
+    save_directory="./gpt-oss-finetuned-merged", tokenizer=tokenizer
+)
 print("✅ Model merged and saved.")
 
 # --- Cleanup ---
@@ -67,5 +98,7 @@ def formatting_prompts_func(examples):
 gc.collect()
 
 safe_remove_directory("./outputs")
-safe_remove_directory("./unsloth_compiled_cache") # Clean up the cache created by this process
+safe_remove_directory(
+    "./unsloth_compiled_cache"
+)  # Clean up the cache created by this process
 print("✅ Cleanup complete. Exiting training script.")
@@ -12,21 +12,28 @@
 
 from tests.utils.cleanup_utils import safe_remove_directory
 
+
 def formatting_prompts_func(examples):
     convos = examples["messages"]
-    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
+    texts = [
+        tokenizer.apply_chat_template(
+            convo, tokenize=False, add_generation_prompt=False
+        )
+        for convo in convos
+    ]
     return {"text": texts}
 
-print(f"\n{'='*80}")
+
+print(f"\n{'=' * 80}")
 print("🔍 PHASE 1: Loading Base Model and Initial Training")
-print(f"{'='*80}")
+print(f"{'=' * 80}")
 
 if torch.cuda.is_bf16_supported():
     compute_dtype = torch.bfloat16
-    attn_implementation = 'flash_attention_2'
+    attn_implementation = "flash_attention_2"
 else:
     compute_dtype = torch.float16
-    attn_implementation = 'sdpa'
+    attn_implementation = "sdpa"
 
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name="unsloth/Llama-3.1-8B-Instruct",
@@ -35,7 +42,7 @@ def formatting_prompts_func(examples):
     load_in_4bit=True,
     load_in_8bit=False,
     full_finetuning=False,
-    attn_implementation=attn_implementation
+    attn_implementation=attn_implementation,
 )
 
 tokenizer = get_chat_template(
@@ -44,19 +51,29 @@ def formatting_prompts_func(examples):
 )
 
 # Load small dataset for quick training
-dataset_train = load_dataset("allenai/openassistant-guanaco-reformatted", split="train[:100]")
+dataset_train = load_dataset(
+    "allenai/openassistant-guanaco-reformatted", split="train[:100]"
+)
 dataset_train = dataset_train.map(formatting_prompts_func, batched=True)
 
 print("✅ Base model loaded successfully!")
 
-print(f"\n{'='*80}")
+print(f"\n{'=' * 80}")
 print("🔍 PHASE 2: First Fine-tuning")
-print(f"{'='*80}")
+print(f"{'=' * 80}")
 
 model = FastLanguageModel.get_peft_model(
     model,
     r=16,
-    target_modules=['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"],
+    target_modules=[
+        "k_proj",
+        "q_proj",
+        "v_proj",
+        "o_proj",
+        "gate_proj",
+        "down_proj",
+        "up_proj",
+    ],
     lora_alpha=16,
     lora_dropout=0,
     bias="none",
@@ -97,21 +114,21 @@ def formatting_prompts_func(examples):
 trainer_stats = trainer.train()
 print("✅ First fine-tuning completed!")
 
-print(f"\n{'='*80}")
+print(f"\n{'=' * 80}")
 print("🔍 PHASE 3: Save with Forced 4bit Merge")
-print(f"{'='*80}")
+print(f"{'=' * 80}")
 
 model.save_pretrained_merged(
-    save_directory='./test_4bit_model',
+    save_directory="./test_4bit_model",
     tokenizer=tokenizer,
-    save_method="forced_merged_4bit"
+    save_method="forced_merged_4bit",
 )
 
 print("✅ Model saved with forced 4bit merge!")
 
-print(f"\n{'='*80}")
+print(f"\n{'=' * 80}")
 print("🔍 PHASE 4: Loading 4bit Model and Second Fine-tuning")
-print(f"{'='*80}")
+print(f"{'=' * 80}")
 
 # Clean up first model
 del model
@@ -137,7 +154,15 @@ def formatting_prompts_func(examples):
 model_4bit = FastLanguageModel.get_peft_model(
     model_4bit,
     r=16,
-    target_modules=['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"],
+    target_modules=[
+        "k_proj",
+        "q_proj",
+        "v_proj",
+        "o_proj",
+        "gate_proj",
+        "down_proj",
+        "up_proj",
+    ],
     lora_alpha=16,
     lora_dropout=0,
     bias="none",
@@ -177,14 +202,14 @@ def formatting_prompts_func(examples):
 trainer_4bit.train()
 print("✅ Second fine-tuning on 4bit model completed!")
 
-print(f"\n{'='*80}")
+print(f"\n{'=' * 80}")
 print("🔍 PHASE 5: Testing TypeError on Regular Merge (Should Fail)")
-print(f"{'='*80}")
+print(f"{'=' * 80}")
 
 try:
     model_4bit.save_pretrained_merged(
-        save_directory='./test_should_fail',
-        tokenizer=tokenizer_4bit
+        save_directory="./test_should_fail",
+        tokenizer=tokenizer_4bit,
         # No save_method specified, should default to regular merge
     )
     assert False, "Expected TypeError but merge succeeded!"
@@ -194,23 +219,23 @@ def formatting_prompts_func(examples):
     print("✅ Correct TypeError raised for 4bit base model regular merge attempt!")
     print(f"Error message: {str(e)}")
 
-print(f"\n{'='*80}")
+print(f"\n{'=' * 80}")
 print("🔍 PHASE 6: Successful Save with Forced 4bit Method")
-print(f"{'='*80}")
+print(f"{'=' * 80}")
 
 try:
     model_4bit.save_pretrained_merged(
-        save_directory='./test_4bit_second',
+        save_directory="./test_4bit_second",
         tokenizer=tokenizer_4bit,
-        save_method="forced_merged_4bit"
+        save_method="forced_merged_4bit",
     )
     print("✅ Successfully saved 4bit model with forced 4bit method!")
 except Exception as e:
     assert False, f"Phase 6 failed unexpectedly: {e}"
 
-print(f"\n{'='*80}")
+print(f"\n{'=' * 80}")
 print("🔍 CLEANUP")
-print(f"{'='*80}")
+print(f"{'=' * 80}")
 
 # Cleanup
 safe_remove_directory("./outputs")