Skip to content

Commit 4d64881

Browse files
committed
make lint
Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>
1 parent 366cc0c commit 4d64881

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

vllm/v1/sample/logits_processor.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -518,10 +518,20 @@ def is_argmax_invariant(self) -> bool:
518518
def update_state(self, batch_update: Optional[BatchUpdate]):
519519
if batch_update:
520520
for index, params, prompt_tok_ids, output_tok_ids in batch_update.added:
521-
max_think_tokens = params.max_think_tokens if isinstance(params, SamplingParams) else None
521+
max_think_tokens = (
522+
params.max_think_tokens
523+
if isinstance(params, SamplingParams)
524+
else None
525+
)
522526
if max_think_tokens is not None:
523-
last_start = self._find_last_token_index(prompt_tok_ids, self.think_start_token_id)
524-
last_end = self._find_last_token_index(prompt_tok_ids, self.think_end_token_id)
527+
last_start = self._find_last_token_index(
528+
prompt_tok_ids,
529+
self.think_start_token_id
530+
)
531+
last_end = self._find_last_token_index(
532+
prompt_tok_ids,
533+
self.think_end_token_id
534+
)
525535
in_think = last_start > last_end
526536
count = len(prompt_tok_ids) - (last_start + 1) if in_think else 0
527537

@@ -534,13 +544,13 @@ def update_state(self, batch_update: Optional[BatchUpdate]):
534544
}
535545

536546
for index in batch_update.removed:
537-
self._state.pop(index, None)
547+
self._state.pop(index, {})
538548

539549
for i1, i2, direction in batch_update.moved:
540550
if direction == MoveDirectionality.SWAP:
541551
self._state[i1], self._state[i2] = self._state[i2], self._state[i1]
542552
else:
543-
self._state[i2] = self._state.pop(i1, None)
553+
self._state[i2] = self._state.pop(i1, {})
544554

545555
# Update in_think and count for all active requests
546556
for state in self._state.values():
@@ -571,7 +581,8 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
571581
if not state:
572582
continue
573583

574-
if state["in_think"] and state["count"] >= state["max_think_tokens"]:
584+
if state["in_think"] and state["count"] >= state[
585+
"max_think_tokens"]:
575586
mask[index] = True
576587

577588
if mask.any():
@@ -581,8 +592,9 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
581592
return logits
582593

583594

584-
def init_builtin_logitsprocs(pin_memory_available: bool, max_num_reqs: int,
585-
device: torch.device, reasoning_config: ReasoningConfig) -> LogitsProcessorManager:
595+
def init_builtin_logitsprocs(
596+
pin_memory_available: bool, max_num_reqs: int, device: torch.device,
597+
reasoning_config: ReasoningConfig) -> LogitsProcessorManager:
586598
"""Construct 'builtin' vLLM logitsprocs which the engine
587599
loads by default.
588600
@@ -611,8 +623,7 @@ def init_builtin_logitsprocs(pin_memory_available: bool, max_num_reqs: int,
611623
)
612624
return LogitsProcessorManager(
613625
non_argmax_invariant=[
614-
min_tokens_logitproc,
615-
logit_bias_logitproc,
626+
min_tokens_logitproc, logit_bias_logitproc,
616627
max_think_tokens_logitproc
617628
],
618629
argmax_invariant=[min_p_logitproc],

vllm/v1/worker/gpu_input_batch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ def _register_add_request(self, request: "CachedRequestState") -> int:
263263
params = (request.sampling_params
264264
if request.sampling_params else request.pooling_params)
265265
self.batch_update_builder.added.append(
266-
(req_index, params, request.prompt_token_ids, request.output_token_ids))
266+
(req_index, params, request.prompt_token_ids,
267+
request.output_token_ids))
267268
return req_index
268269

269270
def add_request(

vllm/v1/worker/gpu_model_runner.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from vllm.attention.backends.abstract import AttentionBackend
2020
from vllm.attention.layer import Attention
2121
from vllm.compilation.counter import compilation_counter
22-
from vllm.config import (CompilationLevel, VllmConfig,
22+
from vllm.config import (CompilationLevel, ReasoningConfig, VllmConfig,
2323
get_layers_from_vllm_config)
2424
from vllm.distributed.eplb.eplb_state import EplbState
2525
from vllm.distributed.kv_transfer import (get_kv_transfer_group,
@@ -40,8 +40,10 @@
4040
from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange
4141
from vllm.multimodal.utils import group_mm_inputs_by_modality
4242
from vllm.pooling_params import PoolingParams
43+
from vllm.reasoning import ReasoningParserManager
4344
from vllm.sampling_params import SamplingType
4445
from vllm.sequence import IntermediateTensors
46+
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
4547
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
4648
GiB_bytes, LazyLoader, async_tensor_h2d, cdiv,
4749
check_use_alibi, get_dtype_size,
@@ -72,10 +74,6 @@
7274
from .utils import (gather_mm_placeholders, initialize_kv_cache_for_kv_sharing,
7375
sanity_check_mm_encoder_outputs, scatter_mm_placeholders)
7476

75-
from vllm.config import ReasoningConfig
76-
from vllm.reasoning import ReasoningParserManager
77-
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
78-
7977
if TYPE_CHECKING:
8078
import xgrammar as xgr
8179
import xgrammar.kernels.apply_token_bitmask_inplace_torch_compile as xgr_torch_compile # noqa: E501
@@ -110,7 +108,8 @@ def __init__(
110108
self.prompt_adapter_config = vllm_config.prompt_adapter_config
111109
self.observability_config = vllm_config.observability_config
112110

113-
if self.vllm_config.decoding_config.reasoning_backend in ('deepseek_r1', 'qwen'):
111+
if self.vllm_config.decoding_config.reasoning_backend in (
112+
'deepseek_r1', 'qwen'):
114113
tokenizer = init_tokenizer_from_configs(
115114
model_config=self.vllm_config.model_config,
116115
scheduler_config=self.vllm_config.scheduler_config,
@@ -121,8 +120,9 @@ def __init__(
121120
reasoner_cls = ReasoningParserManager.get_reasoning_parser(
122121
reasoning_backend)
123122
reasoning_parser = reasoner_cls(tokenizer=tokenizer)
124-
self.vllm_config.reasoning_config = ReasoningConfig(think_start_token_id=reasoning_parser.think_start_token_id,
125-
think_end_token_id=reasoning_parser.think_end_token_id)
123+
self.vllm_config.reasoning_config = ReasoningConfig(
124+
think_start_token_id=reasoning_parser.think_start_token_id,
125+
think_end_token_id=reasoning_parser.think_end_token_id)
126126

127127
from vllm.model_executor.models.utils import set_cpu_offload_max_bytes
128128
set_cpu_offload_max_bytes(

0 commit comments

Comments
 (0)