Skip to content

Commit 3c16d52

Browse files
committed
fix aclgraph enable way
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent c25721a commit 3c16d52

File tree

2 files changed

+38
-34
lines changed

2 files changed

+38
-34
lines changed

tests/compile/test_aclgraph.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,21 @@
2020
Run `pytest tests/compile/test_aclgraph.py`.
2121
"""
2222

23+
import os
24+
2325
import pytest
2426
import torch
2527
from vllm import LLM, SamplingParams
2628

29+
from tests.conftest import VllmRunner
2730
from tests.model_utils import check_outputs_equal
2831
from vllm_ascend.utils import vllm_version_is
2932

3033
MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"]
3134

3235

36+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
37+
reason="aclgraph only support on v1")
3338
@pytest.mark.skipif(
3439
(vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")),
3540
reason="aclgraph not supported in v0.8.5 and v0.8.5.post1")
@@ -53,17 +58,12 @@ def test_models(
5358
temperature=0.0)
5459
# TODO: change to use vllmrunner when the registry of custom op is solved
5560
# while running pytest
56-
vllm_model = LLM(
57-
model,
58-
additional_config={
59-
'enable_aclgraph': True,
60-
},
61-
)
61+
vllm_model = LLM(model)
6262
vllm_aclgraph_outputs = vllm_model.generate(prompts, sampling_params)
6363
del vllm_model
6464
torch.npu.empty_cache()
6565

66-
vllm_model = LLM(model, )
66+
vllm_model = LLM(model, enforce_eager=True)
6767
vllm_eager_outputs = vllm_model.generate(prompts, sampling_params)
6868
del vllm_model
6969
torch.npu.empty_cache()
@@ -84,3 +84,17 @@ def test_models(
8484
name_0="vllm_eager_outputs",
8585
name_1="vllm_aclgraph_outputs",
8686
)
87+
88+
89+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
90+
reason="aclgraph only support on v1")
91+
@pytest.mark.skipif(
92+
(vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")),
93+
reason="aclgraph not supported in v0.8.5 and v0.8.5.post1")
94+
def test_deepseek_raises_error(monkeypatch: pytest.MonkeyPatch) -> None:
95+
with monkeypatch.context() as m:
96+
m.setenv("VLLM_USE_MODELSCOPE", "True")
97+
m.setenv("VLLM_USE_V1", "1")
98+
with pytest.raises(NotImplementedError) as excinfo:
99+
VllmRunner("deepseek-ai/DeepSeek-V2-Lite-Chat", max_model_len=1024)
100+
assert "ACL Graph does not support deepseek" in str(excinfo.value)

vllm_ascend/platform.py

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -119,24 +119,14 @@ def mem_get_info(cls) -> Tuple[int, int]:
119119
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
120120
from vllm.config import CompilationLevel # noqa: E402
121121
compilation_config = vllm_config.compilation_config
122+
model_config = vllm_config.model_config
122123

123-
if vllm_config.model_config is None:
124+
if model_config is None:
124125
logger.warning("Model config is missing. This may indicate "
125126
"that we are running a test case")
126127
enforce_eager = False
127128
else:
128-
enforce_eager = getattr(vllm_config.model_config, "enforce_eager",
129-
False)
130-
# TODO: revert this modification on compilation_config.level
131-
# when aclgraph is fully supported
132-
compilation_config.level = CompilationLevel.NO_COMPILATION
133-
logger.warning(
134-
"ACL Graph mode is currently experimental and disabled "
135-
"by default. 1. Adopt additional_config={'enable_aclgraph': True} to try"
136-
" with aclgraph on V1 engine to serve dense models. "
137-
"2. Adopt additional_config={'enable_graph_mode': True} "
138-
"to serve deepseek models with NPU graph mode on vllm-ascend with V0 engine."
139-
)
129+
enforce_eager = getattr(model_config, "enforce_eager", False)
140130

141131
if vllm_config.additional_config is not None:
142132
enable_graph_mode = vllm_config.additional_config.get(
@@ -151,20 +141,20 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
151141
"NPU graph mode is still experimental and not supported for V1 without mla currently, "
152142
"it has been disabled automatically.")
153143
vllm_config.additional_config["enable_graph_mode"] = False
154-
155-
enable_aclgraph = vllm_config.additional_config.get(
156-
"enable_aclgraph", False)
157-
if enable_aclgraph:
158-
if envs.VLLM_USE_V1:
159-
logger.info(
160-
"Enabling ACL graph mode, note it is still experimental currently, "
161-
"raise issue on https://github.com/vllm-project/vllm-ascend/issues if needed."
162-
)
163-
compilation_config.level = CompilationLevel.PIECEWISE
164-
else:
165-
logger.warning(
166-
"ACL graph mode is only support on V1 engine. "
167-
"Disabling it as now running on V0 engine.")
144+
elif envs.VLLM_USE_V1 and not enforce_eager:
145+
model_type = model_config.hf_config.model_type
146+
if "deepseek" in model_type:
147+
raise NotImplementedError(
148+
"ACL Graph does not support deepseek. Please "
149+
"adopt additional_config={'enable_graph_mode': True} "
150+
"to serve deepseek models with NPU graph mode on vllm-ascend with V0 engine."
151+
" Or set `enforce_eager=True` to use eager mode on V1 engine."
152+
)
153+
elif "qwen" not in model_type:
154+
logger.warning(
155+
"ACL Graph is currently experimental. Please "
156+
"raise an issue on https://github.com/vllm-project/vllm-ascend/issues"
157+
" if you encourage any Error")
168158

169159
if enforce_eager or compilation_config.level == CompilationLevel.NO_COMPILATION:
170160
logger.info("Compilation disabled, using eager mode by default")

0 commit comments

Comments
 (0)