Follow vllm-project/vllm@eaa2e51

Yikun · Yikun · commit a18d381318f2 · 2025-06-08T20:02:27.000+08:00
Signed-off-by: Yikun Jiang &lt;yikunkero@gmail.com&gt;
diff --git a/tests/singlecard/compile/test_simple.py b/tests/singlecard/compile/test_simple.py
@@ -14,6 +14,8 @@
                          set_current_vllm_config)
 from vllm.utils import direct_register_custom_op
 
+from vllm_ascend.utils import vllm_version_is
+
 global_counter = 0
 
 # create a library to hold the custom op
@@ -92,14 +94,24 @@ def test_simple_piecewise_compile():
 
     inputs = torch.randn(100).npu()
 
-    with compilation_counter.expect(
-            num_graphs_seen=1,  # one graph for the model
-            num_piecewise_graphs_seen=5,  # 2 * num_layers + 1
-            num_piecewise_capturable_graphs_seen=3,  # 1 + num_layers
-            num_backend_compilations=3,  # num_piecewise_capturable_graphs_seen
-            num_cudagraph_caputured=
-            6,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
-    ):
+    if vllm_version_is("0.9.0"):
+        kwargs = {
+            "num_graphs_seen": 1,  # one graph for the model
+            "num_piecewise_graphs_seen": 5,  # 2 * num_layers + 1
+            "num_piecewise_capturable_graphs_seen": 3,  # 1 + num_layers
+            "num_backend_compilations": 3,  # num_piecewise_capturable_graphs_seen
+            "num_cudagraph_caputured": 6  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        }
+    else:
+        kwargs = {
+            "num_graphs_seen": 1,  # one graph for the model
+            "num_piecewise_graphs_seen": 5,  # 2 * num_layers + 1
+            "num_piecewise_capturable_graphs_seen": 3,  # 1 + num_layers
+            "num_backend_compilations": 3,  # num_piecewise_capturable_graphs_seen
+            "num_cudagraph_captured": 6  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        }
+
+    with compilation_counter.expect(kwargs):
 
         model(inputs)
 
diff --git a/vllm_ascend/compilation/piecewise_backend.py b/vllm_ascend/compilation/piecewise_backend.py
@@ -31,6 +31,8 @@
 from vllm.logger import logger
 from vllm.utils import weak_ref_tensors
 
+from vllm_ascend.utils import vllm_version_is
+
 
 @dataclasses.dataclass
 class ConcreteSizeEntry:
@@ -205,7 +207,10 @@ def __call__(self, *args) -> Any:
             entry.output = weak_ref_tensors(output)
             entry.aclgraph = aclgraph
 
-            compilation_counter.num_cudagraph_caputured += 1
+            if vllm_version_is("0.9.0"):
+                compilation_counter.num_cudagraph_caputured += 1
+            else:
+                compilation_counter.num_cudagraph_captured += 1
 
             # important: we need to return the output, rather than
             # the weak ref of the output, so that pytorch can correctly