File tree Expand file tree Collapse file tree 1 file changed +0
-11
lines changed Expand file tree Collapse file tree 1 file changed +0
-11
lines changed Original file line number Diff line number Diff line change 4
4
from llmcompressor .modeling import prepare_for_quantization
5
5
from llmcompressor .modifiers .quantization import GPTQModifier
6
6
from llmcompressor .transformers import oneshot
7
- from llmcompressor .utils import dispatch_for_generation
8
7
9
8
# Select model and load it.
10
9
# For DeepSeekv3, we require a full precision model in order to properly calibrate
@@ -72,16 +71,6 @@ def tokenize(sample):
72
71
sequential_targets = ["DeepseekV3Attention" , "DeepseekV3MLP" ],
73
72
)
74
73
75
- # Confirm generations of the quantized model look sane.
76
- print ("\n \n " )
77
- print ("========== SAMPLE GENERATION ==============" )
78
- dispatch_for_generation (model )
79
- sample = tokenizer ("Hello my name is" , return_tensors = "pt" )
80
- sample = {key : value .to ("cuda" ) for key , value in sample .items ()}
81
- output = model .generate (** sample , max_new_tokens = 100 )
82
- print (tokenizer .decode (output [0 ]))
83
- print ("==========================================\n \n " )
84
-
85
74
# Save to disk compressed.
86
75
SAVE_DIR = model_id .rstrip ("/" ).split ("/" )[- 1 ] + "-W4A16-G128"
87
76
model .save_pretrained (SAVE_DIR , save_compressed = True )
You can’t perform that action at this time.
0 commit comments