fix aclgraph enable way

MengqingCao · MengqingCao · commit 3c16d529ba7c · 2025-05-26T07:17:18.000Z
Signed-off-by: MengqingCao &lt;cmq0113@163.com&gt;
diff --git a/tests/compile/test_aclgraph.py b/tests/compile/test_aclgraph.py
@@ -20,16 +20,21 @@
 Run `pytest tests/compile/test_aclgraph.py`.
 """
 
+import os
+
 import pytest
 import torch
 from vllm import LLM, SamplingParams
 
+from tests.conftest import VllmRunner
 from tests.model_utils import check_outputs_equal
 from vllm_ascend.utils import vllm_version_is
 
 MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"]
 
 
+@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
+                    reason="aclgraph only support on v1")
 @pytest.mark.skipif(
     (vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")),
     reason="aclgraph not supported in v0.8.5 and v0.8.5.post1")
@@ -53,17 +58,12 @@ def test_models(
                                          temperature=0.0)
         # TODO: change to use vllmrunner when the registry of custom op is solved
         # while running pytest
-        vllm_model = LLM(
-            model,
-            additional_config={
-                'enable_aclgraph': True,
-            },
-        )
+        vllm_model = LLM(model)
         vllm_aclgraph_outputs = vllm_model.generate(prompts, sampling_params)
         del vllm_model
         torch.npu.empty_cache()
 
-        vllm_model = LLM(model, )
+        vllm_model = LLM(model, enforce_eager=True)
         vllm_eager_outputs = vllm_model.generate(prompts, sampling_params)
         del vllm_model
         torch.npu.empty_cache()
@@ -84,3 +84,17 @@ def test_models(
         name_0="vllm_eager_outputs",
         name_1="vllm_aclgraph_outputs",
     )
+
+
+@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
+                    reason="aclgraph only support on v1")
+@pytest.mark.skipif(
+    (vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")),
+    reason="aclgraph not supported in v0.8.5 and v0.8.5.post1")
+def test_deepseek_raises_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_USE_MODELSCOPE", "True")
+        m.setenv("VLLM_USE_V1", "1")
+        with pytest.raises(NotImplementedError) as excinfo:
+            VllmRunner("deepseek-ai/DeepSeek-V2-Lite-Chat", max_model_len=1024)
+        assert "ACL Graph does not support deepseek" in str(excinfo.value)
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
@@ -119,24 +119,14 @@ def mem_get_info(cls) -> Tuple[int, int]:
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         from vllm.config import CompilationLevel  # noqa: E402
         compilation_config = vllm_config.compilation_config
+        model_config = vllm_config.model_config
 
-        if vllm_config.model_config is None:
+        if model_config is None:
             logger.warning("Model config is missing. This may indicate "
                            "that we are running a test case")
             enforce_eager = False
         else:
-            enforce_eager = getattr(vllm_config.model_config, "enforce_eager",
-                                    False)
-        # TODO: revert this modification on compilation_config.level
-        # when aclgraph is fully supported
-        compilation_config.level = CompilationLevel.NO_COMPILATION
-        logger.warning(
-            "ACL Graph mode is currently experimental and disabled "
-            "by default. 1. Adopt additional_config={'enable_aclgraph': True} to try"
-            " with aclgraph on V1 engine to serve dense models. "
-            "2. Adopt additional_config={'enable_graph_mode': True} "
-            "to serve deepseek models with NPU graph mode on vllm-ascend with V0 engine."
-        )
+            enforce_eager = getattr(model_config, "enforce_eager", False)
 
         if vllm_config.additional_config is not None:
             enable_graph_mode = vllm_config.additional_config.get(
@@ -151,20 +141,20 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                     "NPU graph mode is still experimental and not supported for V1 without mla currently, "
                     "it has been disabled automatically.")
                 vllm_config.additional_config["enable_graph_mode"] = False
-
-            enable_aclgraph = vllm_config.additional_config.get(
-                "enable_aclgraph", False)
-            if enable_aclgraph:
-                if envs.VLLM_USE_V1:
-                    logger.info(
-                        "Enabling ACL graph mode, note it is still experimental currently, "
-                        "raise issue on https://github.com/vllm-project/vllm-ascend/issues if needed."
-                    )
-                    compilation_config.level = CompilationLevel.PIECEWISE
-                else:
-                    logger.warning(
-                        "ACL graph mode is only support on V1 engine. "
-                        "Disabling it as now running on V0 engine.")
+        elif envs.VLLM_USE_V1 and not enforce_eager:
+            model_type = model_config.hf_config.model_type
+            if "deepseek" in model_type:
+                raise NotImplementedError(
+                    "ACL Graph does not support deepseek. Please "
+                    "adopt additional_config={'enable_graph_mode': True} "
+                    "to serve deepseek models with NPU graph mode on vllm-ascend with V0 engine."
+                    " Or set `enforce_eager=True` to use eager mode on V1 engine."
+                )
+            elif "qwen" not in model_type:
+                logger.warning(
+                    "ACL Graph is currently experimental. Please "
+                    "raise an issue on https://github.com/vllm-project/vllm-ascend/issues"
+                    " if you encourage any Error")
 
         if enforce_eager or compilation_config.level == CompilationLevel.NO_COMPILATION:
             logger.info("Compilation disabled, using eager mode by default")