@@ -51,7 +51,7 @@ outputs = llm.chat(
51
51
for output in outputs:
52
52
prompt = output.prompt
53
53
generated_text = output.outputs.text
54
- reasoning_text = output.outputs.resoning_content
54
+ reasoning_text = output.outputs.reasoning_content
55
55
```
56
56
57
57
### Text Completion Interface (LLM.generate)
@@ -89,7 +89,7 @@ from PIL import Image
89
89
90
90
from fastdeploy.entrypoints.llm import LLM
91
91
from fastdeploy.engine.sampling_params import SamplingParams
92
- from fastdeploy.input.ernie_tokenizer_v2 import ErnieBotTokenizer
92
+ from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
93
93
94
94
PATH = " baidu/ERNIE-4.5-VL-28B-A3B-Paddle"
95
95
tokenizer = ErnieBotTokenizer.from_pretrained(os.path.dirname(PATH ))
@@ -125,7 +125,7 @@ for message in messages:
125
125
})
126
126
127
127
sampling_params = SamplingParams(temperature = 0.1 , max_tokens = 6400 )
128
- llm = LLM(model = PATH , tensor_parallel_size = 8 , max_model_len = 32768 , enable_mm = True , limit_mm_per_prompt = {" image" : 100 }, reasoning_parser = " ernie-45-vl" )
128
+ llm = LLM(model = PATH , tensor_parallel_size = 1 , max_model_len = 32768 , enable_mm = True , limit_mm_per_prompt = {" image" : 100 }, reasoning_parser = " ernie-45-vl" )
129
129
outputs = llm.generate(prompts = {
130
130
" prompt" : prompt,
131
131
" multimodal_data" : {
@@ -138,7 +138,7 @@ outputs = llm.generate(prompts={
138
138
for output in outputs:
139
139
prompt = output.prompt
140
140
generated_text = output.outputs.text
141
- reasoning_text = output.outputs.resoning_content
141
+ reasoning_text = output.outputs.reasoning_content
142
142
143
143
```
144
144
> Note: The ` generate interface ` does not currently support passing parameters to control the thinking function (on/off). It always uses the model's default parameters.
0 commit comments