File tree Expand file tree Collapse file tree 2 files changed +5
-1
lines changed
tests/models/language/pooling Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -101,4 +101,4 @@ def test_prm_models(
101
101
hf_output = torch .tensor (hf_output )
102
102
vllm_output = torch .tensor (vllm_output )
103
103
104
- assert torch .allclose (hf_output , vllm_output , 1e -2 )
104
+ assert torch .allclose (hf_output , vllm_output , 1.5e -2 )
Original file line number Diff line number Diff line change 7
7
from vllm .config import VllmConfig
8
8
from vllm .logger import init_logger
9
9
from vllm .model_executor .model_loader import get_model
10
+ from vllm .model_executor .models .interfaces import has_step_pooler
10
11
from vllm .v1 .worker .gpu_model_runner import GPUModelRunner
11
12
12
13
logger = init_logger (__name__ )
@@ -52,6 +53,9 @@ def load_model(self) -> None:
52
53
logger .info ("Starting to load model %s..." , self .model_config .model )
53
54
self .model = get_model (vllm_config = self .vllm_config )
54
55
56
+ if has_step_pooler (self .model ):
57
+ self .input_batch .logits_processing_needs_token_ids = True
58
+
55
59
if self .lora_config :
56
60
self .model = self .load_lora_model (self .model , self .model_config ,
57
61
self .scheduler_config ,
You can’t perform that action at this time.
0 commit comments