Skip to content

Commit 5a80423

Browse files
committed
update test for V1
Signed-off-by: Linkun Chen <github@lkchen.net>
1 parent e290330 commit 5a80423

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

python/ray/llm/tests/batch/gpu/stages/test_vllm_engine_stage.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,9 @@ async def test_vllm_wrapper_semaphore(model_llama_3_2_216M):
171171
patch(
172172
"ray.llm._internal.batch.stages.vllm_engine_stage.vLLMEngineWrapper.generate_async_v0"
173173
) as mock_generate_async_v0,
174+
patch(
175+
"ray.llm._internal.batch.stages.vllm_engine_stage.vLLMEngineWrapper.generate_async_v1"
176+
) as mock_generate_async_v1,
174177
):
175178
mock_engine.from_engine_args.return_value = AsyncMock()
176179
num_running_requests = 0
@@ -207,6 +210,7 @@ async def mock_generate(request):
207210
)
208211

209212
mock_generate_async_v0.side_effect = mock_generate
213+
mock_generate_async_v1.side_effect = mock_generate
210214

211215
# Create wrapper with max 2 pending requests
212216
wrapper = vLLMEngineWrapper(
@@ -227,7 +231,10 @@ async def mock_generate(request):
227231
await asyncio.gather(*tasks)
228232

229233
# Verify all requests were processed
230-
assert mock_generate_async_v0.call_count == 10
234+
assert (
235+
mock_generate_async_v0.call_count == 10
236+
or mock_generate_async_v1.call_count == 10
237+
)
231238

232239

233240
@pytest.mark.asyncio

0 commit comments

Comments
 (0)