Skip to content

Commit 0e0329d

Browse files
bigPYJ1151gmarinho2
authored andcommitted
[Bugfix][CPU] Fix InputBatch for pooling models in the CPU v1 (vllm-project#20014)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
1 parent 2d7f8c3 commit 0e0329d

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

tests/models/language/pooling/test_reward.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,4 @@ def test_prm_models(
101101
hf_output = torch.tensor(hf_output)
102102
vllm_output = torch.tensor(vllm_output)
103103

104-
assert torch.allclose(hf_output, vllm_output, 1e-2)
104+
assert torch.allclose(hf_output, vllm_output, 1.5e-2)

vllm/v1/worker/cpu_model_runner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from vllm.config import VllmConfig
88
from vllm.logger import init_logger
99
from vllm.model_executor.model_loader import get_model
10+
from vllm.model_executor.models.interfaces import has_step_pooler
1011
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
1112

1213
logger = init_logger(__name__)
@@ -52,6 +53,9 @@ def load_model(self) -> None:
5253
logger.info("Starting to load model %s...", self.model_config.model)
5354
self.model = get_model(vllm_config=self.vllm_config)
5455

56+
if has_step_pooler(self.model):
57+
self.input_batch.logits_processing_needs_token_ids = True
58+
5559
if self.lora_config:
5660
self.model = self.load_lora_model(self.model, self.model_config,
5761
self.scheduler_config,

0 commit comments

Comments
 (0)