Skip to content

Commit 75f5150

Browse files
committed
update code
1 parent 423c024 commit 75f5150

File tree

6 files changed

+29
-30
lines changed

6 files changed

+29
-30
lines changed

docs/features/structured_outputs.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ The following example demonstrates how to use offline inference to generate a st
348348

349349
```python
350350
from fastdeploy import LLM, SamplingParams
351-
from fastdeploy.sampling_params import GuidedDecodingParams
351+
from fastdeploy.engine.sampling_params import GuidedDecodingParams
352352
from pydantic import BaseModel
353353
from enum import Enum
354354

@@ -375,21 +375,20 @@ sampling_params = SamplingParams(
375375
)
376376

377377
# Load model
378-
llm = LLM(model="ERNIE-4.5-0.3B", tensor_parallel_size=1, max_model_len=8192)
378+
llm = LLM(model="ERNIE-4.5-0.3B", tensor_parallel_size=1, max_model_len=8192, guided_decoding_backend="auto")
379379

380380
outputs = llm.generate(
381-
prompts="Classify this sentiment: vLLM is wonderful!",
381+
prompts="Generate a JSON describing a literary work, including author, title and book type.",
382382
sampling_params=sampling_params,
383383
)
384384

385385
# Output results
386386
for output in outputs:
387-
prompt = output.prompt
388-
generated_text = output.outputs.text
387+
print(output.outputs.text)
389388
```
390389

391390
Output:
392391

393392
```
394-
{"author": "Cao Xueqin", "title": "Dream of the Red Chamber", "genre": "Historical"}
393+
{"author": "George Orwell", "title": "1984", "genre": "Dystopian"}
395394
```

docs/zh/features/structured_outputs.md

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ structural_tag: Optional[str] = None
349349
```python
350350

351351
from fastdeploy import LLM, SamplingParams
352-
from fastdeploy.sampling_params import GuidedDecodingParams
352+
from fastdeploy.engine.sampling_params import GuidedDecodingParams
353353
from pydantic import BaseModel
354354
from enum import Enum
355355

@@ -365,28 +365,27 @@ class BookDescription(BaseModel):
365365
title: str
366366
genre: BookType
367367

368-
# 受限解码参数
368+
# Constrained decoding parameters
369369
guided_decoding_params = GuidedDecodingParams(json=BookDescription.model_json_schema())
370370

371-
# 采样参数
371+
# Sampling parameters
372372
sampling_params = SamplingParams(
373373
top_p=0.95,
374374
max_tokens=6400,
375375
guided_decoding=guided_decoding_params,
376376
)
377377

378-
# 加载模型
379-
llm = LLM(model="ERNIE-4.5-0.3B", tensor_parallel_size=1, max_model_len=8192)
378+
# Load model
379+
llm = LLM(model="ERNIE-4.5-0.3B", tensor_parallel_size=1, max_model_len=8192, guided_decoding_backend="auto")
380380

381381
outputs = llm.generate(
382-
prompts="Classify this sentiment: vLLM is wonderful!",
382+
prompts="生成一个JSON,描述一本中国的著作,要包含作者、标题和书籍类型。",
383383
sampling_params=sampling_params,
384384
)
385385

386-
# 输出结果
386+
# Output results
387387
for output in outputs:
388-
prompt = output.prompt
389-
generated_text = output.outputs.text
388+
print(output.outputs.text)
390389

391390
```
392391

fastdeploy/engine/sampling_params.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,6 @@ def _verify_args(self) -> None:
196196
raise ValueError("seed must be in [0, 922337203685477580], got "
197197
f"{self.seed}.")
198198

199-
if self.guided_decoding is not None:
200-
self.guided_decoding._verify_args()
201-
202199
def update_from_tokenizer(self, tokenizer):
203200
"""
204201
# TODO: Implement stop tokens and bad words support
@@ -245,7 +242,7 @@ def to_dict(self):
245242
guided_dict[key] = value
246243
return guided_dict
247244

248-
def _verify_args(self):
245+
def __post_init__(self):
249246
"""Verify the arguments."""
250247
guided_count = sum([
251248
self.json is not None, self.regex is not None, self.choice

fastdeploy/input/ernie_processor.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,9 @@
2020
from paddleformers.generation import GenerationConfig
2121

2222
from fastdeploy import envs
23-
from fastdeploy.utils import data_processor_logger
2423
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
25-
2624
from fastdeploy.input.text_processor import BaseDataProcessor
25+
from fastdeploy.utils import data_processor_logger
2726

2827
_SAMPLING_EPS = 1e-5
2928

@@ -101,7 +100,7 @@ def process_request(self, request, max_model_len=None, **kwargs):
101100

102101
if request.prompt_token_ids is None or len(
103102
request.prompt_token_ids) == 0:
104-
system = request.get("system")
103+
# system = request.get("system")
105104
if request.prompt is None and request.messages is None:
106105
raise ValueError(
107106
f"The request should have `input_ids`, `text` or `messages`: {request}.")
@@ -150,7 +149,7 @@ def process_request_dict(self, request, max_model_len=None):
150149
request['stop_token_ids'] = stop_seqs
151150
request['stop_seqs_len'] = stop_seqs_len
152151

153-
system = request.get("system")
152+
# system = request.get("system")
154153
# 处理prompt_token_ids
155154
if not request.get('prompt_token_ids'):
156155
if request.get('prompt') is None and request.get(
@@ -214,7 +213,7 @@ def process_response(self, response_dict, **kwargs):
214213
response_dict.outputs.reasoning_content = reasoning_content
215214
else:
216215
response_dict.outputs.text = full_text
217-
data_processor_logger.info(f"req_id:{req_id}, token)ids: {token_ids}")
216+
data_processor_logger.info(f"req_id:{req_id}, token ids: {token_ids}")
218217
if response_dict.outputs.text == "" and \
219218
response_dict.outputs.reasoning_content == "" and \
220219
response_dict.outputs.tool_call_content == []:

fastdeploy/model_executor/layers/sample/sampler.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,15 +124,14 @@ def apply_token_mask(self,
124124

125125
indices = []
126126
for idx, processor in self.logits_processor.items():
127-
if processor is None:
127+
if processor is None or idx in skip_idx_list:
128128
continue
129129
if not processor.enable_reasoning or processor.reasoning_ended:
130130
indices.append(idx)
131131

132-
mask_idx = [i for i in indices if i not in skip_idx_list]
133132
return available_processors.apply_token_mask(logits,
134133
self.token_bitmask,
135-
indices=mask_idx)
134+
indices=indices)
136135

137136
def _accept_token(self, idx: int, token: int):
138137
""" accept token """

fastdeploy/worker/worker_process.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,16 @@ def initialize_fd_config(config_or_args) -> FDConfig:
579579
Returns:
580580
FDConfig: Initialized FastDeploy configuration object
581581
"""
582-
# Get model config from model directory
583-
model_config_dict, _ = ModelConfig.get_config_dict(config_or_args.model_name_or_path)
584582

583+
def getattr_without_none(obj, attr_name, default=None):
584+
if hasattr(obj, attr_name):
585+
if getattr(obj, attr_name) == "None":
586+
return default
587+
return getattr(obj, attr_name)
588+
return default
585589

590+
# Get model config from model directory
591+
model_config_dict, _ = ModelConfig.get_config_dict(config_or_args.model_name_or_path)
586592

587593
# Handle MoE related configs
588594
if 'num_experts' in model_config_dict:
@@ -624,7 +630,7 @@ def initialize_fd_config(config_or_args) -> FDConfig:
624630

625631
# Handle quantization (check for attribute existence)
626632
model_config.quantization = getattr(config_or_args, 'quantization', None)
627-
model_config.reasoning_parser = getattr(config_or_args, 'reasoning_parser', None)
633+
model_config.reasoning_parser = getattr_without_none(config_or_args, 'reasoning_parser', None)
628634

629635
# Update speculative config_or_args
630636
speculative_config.method = getattr(config_or_args, 'speculative_method', None)

0 commit comments

Comments
 (0)