Skip to content

Commit 7b1895e

Browse files
authored
[CI Fix] Try fixing eagle e2e test OOM by reducing block allocation (#20213)
Signed-off-by: mgoin <mgoin64@gmail.com>
1 parent 4d36693 commit 7b1895e

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

tests/spec_decode/e2e/test_eagle_correctness.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ def test_llama2_eagle_e2e_greedy_correctness(vllm_runner, common_llm_kwargs,
370370
@pytest.mark.parametrize(
371371
"common_llm_kwargs",
372372
[{
373+
# 2 for small prompt, 256//16 for generated.
374+
"num_gpu_blocks_override": 2 + 256 // 16,
375+
"max_model_len": (2 + 256 // 16) * 16,
376+
373377
# Skip cuda graph recording for fast test.
374378
"enforce_eager": True,
375379
@@ -420,6 +424,10 @@ def test_llama3_eagle_e2e_greedy_correctness(vllm_runner, common_llm_kwargs,
420424
@pytest.mark.parametrize(
421425
"common_llm_kwargs",
422426
[{
427+
# 2 for small prompt, 256//16 for generated.
428+
"num_gpu_blocks_override": 2 + 256 // 16,
429+
"max_model_len": (2 + 256 // 16) * 16,
430+
423431
# Skip cuda graph recording for fast test.
424432
"enforce_eager": True,
425433

0 commit comments

Comments
 (0)