File tree Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -87,11 +87,12 @@ def process_and_tokenize(example):
87
87
# NOTE: transformers 4.49.0 results in a generation error with gemma2.
88
88
# Consider either downgrading your transformers version to a previous version
89
89
# or use vLLM for sample generation.
90
+ # Note: compile is disabled: https://github.com/huggingface/transformers/issues/38333
90
91
print ("\n \n " )
91
92
dispatch_for_generation (model )
92
93
print ("========== SAMPLE GENERATION ==============" )
93
94
input_ids = tokenizer ("Hello my name is" , return_tensors = "pt" ).input_ids .to ("cuda" )
94
- output = model .generate (input_ids , max_new_tokens = 100 )
95
+ output = model .generate (input_ids , max_new_tokens = 100 , disable_compile = True )
95
96
print (tokenizer .decode (output [0 ]))
96
97
print ("==========================================\n \n " )
97
98
Original file line number Diff line number Diff line change 29
29
# NOTE: transformers 4.49.0 results in a generation error with gemma2.
30
30
# Consider either downgrading your transformers version to a previous version
31
31
# or use vLLM for sample generation.
32
+ # Note: compile is disabled: https://github.com/huggingface/transformers/issues/38333
32
33
print ("========== SAMPLE GENERATION ==============" )
33
34
dispatch_for_generation (model )
34
35
input_ids = tokenizer ("Hello my name is" , return_tensors = "pt" ).input_ids .to ("cuda" )
35
- output = model .generate (input_ids , max_new_tokens = 20 )
36
+ output = model .generate (input_ids , max_new_tokens = 20 , disable_compile = True )
36
37
print (tokenizer .decode (output [0 ]))
37
38
print ("==========================================" )
38
39
You can’t perform that action at this time.
0 commit comments