|
14 | 14 | set_current_vllm_config)
|
15 | 15 | from vllm.utils import direct_register_custom_op
|
16 | 16 |
|
| 17 | +from vllm_ascend.utils import vllm_version_is |
| 18 | + |
17 | 19 | global_counter = 0
|
18 | 20 |
|
19 | 21 | # create a library to hold the custom op
|
@@ -92,14 +94,28 @@ def test_simple_piecewise_compile():
|
92 | 94 |
|
93 | 95 | inputs = torch.randn(100).npu()
|
94 | 96 |
|
95 |
| - with compilation_counter.expect( |
96 |
| - num_graphs_seen=1, # one graph for the model |
97 |
| - num_piecewise_graphs_seen=5, # 2 * num_layers + 1 |
98 |
| - num_piecewise_capturable_graphs_seen=3, # 1 + num_layers |
99 |
| - num_backend_compilations=3, # num_piecewise_capturable_graphs_seen |
100 |
| - num_cudagraph_caputured= |
101 |
| - 6, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen |
102 |
| - ): |
| 97 | + if vllm_version_is("0.9.0"): |
| 98 | + kwargs = { |
| 99 | + "num_graphs_seen": 1, # one graph for the model |
| 100 | + "num_piecewise_graphs_seen": 5, # 2 * num_layers + 1 |
| 101 | + "num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers |
| 102 | + "num_backend_compilations": |
| 103 | + 3, # num_piecewise_capturable_graphs_seen |
| 104 | + "num_cudagraph_caputured": |
| 105 | + 6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen |
| 106 | + } |
| 107 | + else: |
| 108 | + kwargs = { |
| 109 | + "num_graphs_seen": 1, # one graph for the model |
| 110 | + "num_piecewise_graphs_seen": 5, # 2 * num_layers + 1 |
| 111 | + "num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers |
| 112 | + "num_backend_compilations": |
| 113 | + 3, # num_piecewise_capturable_graphs_seen |
| 114 | + "num_cudagraph_captured": |
| 115 | + 6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen |
| 116 | + } |
| 117 | + |
| 118 | + with compilation_counter.expect(kwargs): |
103 | 119 |
|
104 | 120 | model(inputs)
|
105 | 121 |
|
|
0 commit comments