[CI] Fix oom in chunk prefill (#1622)

MengqingCao · web-flow · commit 7efa4e92fead · 2025-07-07T10:14:40.000+08:00
### What this PR does / why we need it?
Add the resource clear logic to fix oom issue when testing
`tests/e2e/singlecard/core/ascend_scheduler`.
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
CI passed with existing test.

---------

Signed-off-by: MengqingCao &lt;cmq0113@163.com&gt;
diff --git a/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py b/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py
@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import gc
 import os
 
 import pytest
+import torch
 from vllm import LLM
 
 if os.getenv("VLLM_USE_V1", "0") != "1":
@@ -13,8 +15,8 @@
 
 
 @pytest.fixture(scope="module")
-def model() -> LLM:
-    return LLM(
+def model():
+    llm = LLM(
         MODEL,
         enforce_eager=True,
         enable_prefix_caching=True,
@@ -23,6 +25,10 @@ def model() -> LLM:
         additional_config={"ascend_scheduler_config": {
             "enabled": True,
         }})
+    yield llm
+    del llm
+    torch.npu.empty_cache()
+    gc.collect()
 
 
 def test_concurrent_partial_prefill(model):
@@ -37,4 +43,4 @@ def test_prefix_cache_stats_is_recorded(model):
     input_tokens = {"prompt_token_ids": [101] * 129}
     _ = model.generate([input_tokens])
     outputs = model.generate([input_tokens])
-    assert outputs[0].num_cached_tokens == 128
+    assert outputs[0].num_cached_tokens == 128
diff --git a/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py b/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py
@@ -17,7 +17,6 @@
 ]
 
 
-@pytest.mark.skipif(True, reason="oom in 910B4, fix me please")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens",
                          [4])  # cannot align results when max_tokens > 4

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,6 @@`
`17`	`17`	`]`
`18`	`18`
`19`	`19`
`20`		`-@pytest.mark.skipif(True, reason="oom in 910B4, fix me please")`
`21`	`20`	`@pytest.mark.parametrize("model", MODELS)`
`22`	`21`	`@pytest.mark.parametrize("max_tokens",`
`23`	`22`	`[4]) # cannot align results when max_tokens > 4`