Skip to content

Commit 3c4fc40

Browse files
committed
make lint
Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>
1 parent f2e195a commit 3c4fc40

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

vllm/v1/sample/logits_processor.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -526,10 +526,20 @@ def is_argmax_invariant(self) -> bool:
526526
def update_state(self, batch_update: Optional[BatchUpdate]):
527527
if batch_update:
528528
for index, params, prompt_tok_ids, output_tok_ids in batch_update.added:
529-
max_think_tokens = params.max_think_tokens if isinstance(params, SamplingParams) else None
529+
max_think_tokens = (
530+
params.max_think_tokens
531+
if isinstance(params, SamplingParams)
532+
else None
533+
)
530534
if max_think_tokens is not None:
531-
last_start = self._find_last_token_index(prompt_tok_ids, self.think_start_token_id)
532-
last_end = self._find_last_token_index(prompt_tok_ids, self.think_end_token_id)
535+
last_start = self._find_last_token_index(
536+
prompt_tok_ids,
537+
self.think_start_token_id
538+
)
539+
last_end = self._find_last_token_index(
540+
prompt_tok_ids,
541+
self.think_end_token_id
542+
)
533543
in_think = last_start > last_end
534544
count = len(prompt_tok_ids) - (last_start + 1) if in_think else 0
535545

@@ -542,13 +552,13 @@ def update_state(self, batch_update: Optional[BatchUpdate]):
542552
}
543553

544554
for index in batch_update.removed:
545-
self._state.pop(index, None)
555+
self._state.pop(index, {})
546556

547557
for i1, i2, direction in batch_update.moved:
548558
if direction == MoveDirectionality.SWAP:
549559
self._state[i1], self._state[i2] = self._state[i2], self._state[i1]
550560
else:
551-
self._state[i2] = self._state.pop(i1, None)
561+
self._state[i2] = self._state.pop(i1, {})
552562

553563
# Update in_think and count for all active requests
554564
for state in self._state.values():
@@ -579,7 +589,8 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
579589
if not state:
580590
continue
581591

582-
if state["in_think"] and state["count"] >= state["max_think_tokens"]:
592+
if state["in_think"] and state["count"] >= state[
593+
"max_think_tokens"]:
583594
mask[index] = True
584595

585596
if mask.any():
@@ -589,8 +600,9 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
589600
return logits
590601

591602

592-
def init_builtin_logitsprocs(pin_memory_available: bool, max_num_reqs: int,
593-
device: torch.device, reasoning_config: ReasoningConfig) -> LogitsProcessorManager:
603+
def init_builtin_logitsprocs(
604+
pin_memory_available: bool, max_num_reqs: int, device: torch.device,
605+
reasoning_config: ReasoningConfig) -> LogitsProcessorManager:
594606
"""Construct 'builtin' vLLM logitsprocs which the engine
595607
loads by default.
596608
@@ -619,8 +631,7 @@ def init_builtin_logitsprocs(pin_memory_available: bool, max_num_reqs: int,
619631
)
620632
return LogitsProcessorManager(
621633
non_argmax_invariant=[
622-
min_tokens_logitproc,
623-
logit_bias_logitproc,
634+
min_tokens_logitproc, logit_bias_logitproc,
624635
max_think_tokens_logitproc
625636
],
626637
argmax_invariant=[min_p_logitproc],

vllm/v1/worker/gpu_input_batch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ def _register_add_request(self, request: "CachedRequestState") -> int:
263263
params = (request.sampling_params
264264
if request.sampling_params else request.pooling_params)
265265
self.batch_update_builder.added.append(
266-
(req_index, params, request.prompt_token_ids, request.output_token_ids))
266+
(req_index, params, request.prompt_token_ids,
267+
request.output_token_ids))
267268
return req_index
268269

269270
def add_request(

vllm/v1/worker/gpu_model_runner.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from vllm.attention.backends.abstract import AttentionBackend
1919
from vllm.attention.layer import Attention
2020
from vllm.compilation.counter import compilation_counter
21-
from vllm.config import (CompilationLevel, VllmConfig,
21+
from vllm.config import (CompilationLevel, ReasoningConfig, VllmConfig,
2222
get_layers_from_vllm_config, update_config)
2323
from vllm.distributed.eplb.eplb_state import EplbState
2424
from vllm.distributed.kv_transfer import (get_kv_transfer_group,
@@ -39,8 +39,10 @@
3939
from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
4040
from vllm.multimodal.utils import group_mm_inputs_by_modality
4141
from vllm.pooling_params import PoolingParams
42+
from vllm.reasoning import ReasoningParserManager
4243
from vllm.sampling_params import SamplingType
4344
from vllm.sequence import IntermediateTensors
45+
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
4446
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
4547
GiB_bytes, LazyLoader, async_tensor_h2d, cdiv,
4648
check_use_alibi, get_dtype_size,
@@ -71,10 +73,6 @@
7173
from .utils import (gather_mm_placeholders, initialize_kv_cache_for_kv_sharing,
7274
sanity_check_mm_encoder_outputs, scatter_mm_placeholders)
7375

74-
from vllm.config import ReasoningConfig
75-
from vllm.reasoning import ReasoningParserManager
76-
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
77-
7876
if TYPE_CHECKING:
7977
import xgrammar as xgr
8078
import xgrammar.kernels.apply_token_bitmask_inplace_torch_compile as xgr_torch_compile # noqa: E501
@@ -109,7 +107,8 @@ def __init__(
109107
self.prompt_adapter_config = vllm_config.prompt_adapter_config
110108
self.observability_config = vllm_config.observability_config
111109

112-
if self.vllm_config.decoding_config.reasoning_backend in ('deepseek_r1', 'qwen'):
110+
if self.vllm_config.decoding_config.reasoning_backend in (
111+
'deepseek_r1', 'qwen'):
113112
tokenizer = init_tokenizer_from_configs(
114113
model_config=self.vllm_config.model_config,
115114
scheduler_config=self.vllm_config.scheduler_config,
@@ -120,8 +119,9 @@ def __init__(
120119
reasoner_cls = ReasoningParserManager.get_reasoning_parser(
121120
reasoning_backend)
122121
reasoning_parser = reasoner_cls(tokenizer=tokenizer)
123-
self.vllm_config.reasoning_config = ReasoningConfig(think_start_token_id=reasoning_parser.think_start_token_id,
124-
think_end_token_id=reasoning_parser.think_end_token_id)
122+
self.vllm_config.reasoning_config = ReasoningConfig(
123+
think_start_token_id=reasoning_parser.think_start_token_id,
124+
think_end_token_id=reasoning_parser.think_end_token_id)
125125

126126
from vllm.model_executor.models.utils import set_cpu_offload_max_bytes
127127
set_cpu_offload_max_bytes(

0 commit comments

Comments
 (0)