Skip to content

Commit ad506fa

Browse files
committed
skip generation
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent 941deac commit ad506fa

File tree

1 file changed

+0
-11
lines changed

1 file changed

+0
-11
lines changed

examples/quantizing_moe/deepseekv3_example.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from llmcompressor.modeling import prepare_for_quantization
55
from llmcompressor.modifiers.quantization import GPTQModifier
66
from llmcompressor.transformers import oneshot
7-
from llmcompressor.utils import dispatch_for_generation
87

98
# Select model and load it.
109
# For DeepSeekv3, we require a full precision model in order to properly calibrate
@@ -72,16 +71,6 @@ def tokenize(sample):
7271
sequential_targets=["DeepseekV3Attention", "DeepseekV3MLP"],
7372
)
7473

75-
# Confirm generations of the quantized model look sane.
76-
print("\n\n")
77-
print("========== SAMPLE GENERATION ==============")
78-
dispatch_for_generation(model)
79-
sample = tokenizer("Hello my name is", return_tensors="pt")
80-
sample = {key: value.to("cuda") for key, value in sample.items()}
81-
output = model.generate(**sample, max_new_tokens=100)
82-
print(tokenizer.decode(output[0]))
83-
print("==========================================\n\n")
84-
8574
# Save to disk compressed.
8675
SAVE_DIR = model_id.rstrip("/").split("/")[-1] + "-W4A16-G128"
8776
model.save_pretrained(SAVE_DIR, save_compressed=True)

0 commit comments

Comments
 (0)