File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed
examples/quantization_w8a8_int8 Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -70,9 +70,10 @@ def tokenize(sample):
70
70
# NOTE: transformers 4.49.0 results in a generation error with gemma2.
71
71
# Consider either downgrading your transformers version to a previous version
72
72
# or use vLLM for sample generation.
73
+ # Note: compile is disabled: https://github.com/huggingface/transformers/issues/38333
73
74
print ("========== SAMPLE GENERATION ==============" )
74
75
input_ids = tokenizer ("Hello my name is" , return_tensors = "pt" ).input_ids .to ("cuda" )
75
- output = model .generate (input_ids , max_new_tokens = 20 )
76
+ output = model .generate (input_ids , max_new_tokens = 20 , disable_compile = True )
76
77
print (tokenizer .decode (output [0 ]))
77
78
print ("==========================================" )
78
79
You can’t perform that action at this time.
0 commit comments