Skip to content

Commit 7efa4e9

Browse files
authored
[CI] Fix oom in chunk prefill (#1622)
### What this PR does / why we need it? Add the resource clear logic to fix oom issue when testing `tests/e2e/singlecard/core/ascend_scheduler`. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. --------- Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent c58accc commit 7efa4e9

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import gc
34
import os
45

56
import pytest
7+
import torch
68
from vllm import LLM
79

810
if os.getenv("VLLM_USE_V1", "0") != "1":
@@ -13,8 +15,8 @@
1315

1416

1517
@pytest.fixture(scope="module")
16-
def model() -> LLM:
17-
return LLM(
18+
def model():
19+
llm = LLM(
1820
MODEL,
1921
enforce_eager=True,
2022
enable_prefix_caching=True,
@@ -23,6 +25,10 @@ def model() -> LLM:
2325
additional_config={"ascend_scheduler_config": {
2426
"enabled": True,
2527
}})
28+
yield llm
29+
del llm
30+
torch.npu.empty_cache()
31+
gc.collect()
2632

2733

2834
def test_concurrent_partial_prefill(model):
@@ -37,4 +43,4 @@ def test_prefix_cache_stats_is_recorded(model):
3743
input_tokens = {"prompt_token_ids": [101] * 129}
3844
_ = model.generate([input_tokens])
3945
outputs = model.generate([input_tokens])
40-
assert outputs[0].num_cached_tokens == 128
46+
assert outputs[0].num_cached_tokens == 128

tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
]
1818

1919

20-
@pytest.mark.skipif(True, reason="oom in 910B4, fix me please")
2120
@pytest.mark.parametrize("model", MODELS)
2221
@pytest.mark.parametrize("max_tokens",
2322
[4]) # cannot align results when max_tokens > 4

0 commit comments

Comments
 (0)