Skip to content

Commit 7bdada5

Browse files
authored
[Misc] Remove VLLM_USE_V1 usage in code (#1764)
We plan to remove V0 code from this version. The first step is to delete v0 usage. Related: #1620 - vLLM version: v0.9.2 - vLLM main: vllm-project/vllm@61e2082 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 494b0f4 commit 7bdada5

File tree

6 files changed

+101
-218
lines changed

6 files changed

+101
-218
lines changed

tests/ut/test_ascend_config.py

Lines changed: 42 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -193,71 +193,48 @@ def test_check_ascend_config_pass(self):
193193
@_clean_up_ascend_config
194194
def test_check_ascend_config_wrong_case(self):
195195
test_vllm_config = VllmConfig()
196-
# For V0 engine
197-
with mock.patch.dict(os.environ, {"VLLM_USE_V1": "0"}):
198-
with self.assertRaises(NotImplementedError):
199-
test_vllm_config.additional_config = {
200-
"torchair_graph_config": {
201-
"enabled": True,
202-
},
203-
"refresh": True
204-
}
205-
init_ascend_config(test_vllm_config)
206-
check_ascend_config(test_vllm_config, False)
207-
with self.assertRaises(NotImplementedError):
208-
test_vllm_config.additional_config = {
209-
"ascend_scheduler_config": {
210-
"enabled": True,
211-
},
212-
"refresh": True
213-
}
214-
init_ascend_config(test_vllm_config)
215-
check_ascend_config(test_vllm_config, True)
216-
# For V1 engine
217-
with mock.patch.dict(os.environ, {"VLLM_USE_V1": "1"}):
218-
# torchair + eager mode
219-
with self.assertRaises(RuntimeError):
220-
test_vllm_config.additional_config = {
221-
"torchair_graph_config": {
222-
"enabled": True,
223-
},
224-
"refresh": True
225-
}
226-
init_ascend_config(test_vllm_config)
227-
enforce_eager = True
228-
check_ascend_config(test_vllm_config, enforce_eager)
229-
# torchair + non deepseek model
230-
with self.assertRaises(NotImplementedError):
231-
test_vllm_config.additional_config = {
232-
"torchair_graph_config": {
233-
"enabled": True,
234-
},
235-
"refresh": True
236-
}
237-
model_path = os.path.join(os.path.dirname(__file__),
238-
"fake_weight")
239-
fake_model_config = ModelConfig(model=model_path)
240-
fake_model_config.hf_config = PretrainedConfig()
241-
fake_model_config.hf_config.model_type = "llama"
242-
test_vllm_config.model_config = fake_model_config
243-
init_ascend_config(test_vllm_config)
244-
check_ascend_config(test_vllm_config, False)
245-
# aclgraph + deepseek model
246-
with self.assertRaises(NotImplementedError):
247-
test_vllm_config.additional_config = {
248-
"torchair_graph_config": {
249-
"enabled": False,
250-
},
251-
"refresh": True
252-
}
253-
model_path = os.path.join(os.path.dirname(__file__),
254-
"fake_weight")
255-
fake_model_config = ModelConfig(model=model_path)
256-
fake_model_config.hf_config = PretrainedConfig()
257-
fake_model_config.hf_config.model_type = "deepseek"
258-
test_vllm_config.model_config = fake_model_config
259-
init_ascend_config(test_vllm_config)
260-
check_ascend_config(test_vllm_config, False)
196+
197+
# torchair + eager mode
198+
with self.assertRaises(RuntimeError):
199+
test_vllm_config.additional_config = {
200+
"torchair_graph_config": {
201+
"enabled": True,
202+
},
203+
"refresh": True
204+
}
205+
init_ascend_config(test_vllm_config)
206+
enforce_eager = True
207+
check_ascend_config(test_vllm_config, enforce_eager)
208+
# torchair + non deepseek model
209+
with self.assertRaises(NotImplementedError):
210+
test_vllm_config.additional_config = {
211+
"torchair_graph_config": {
212+
"enabled": True,
213+
},
214+
"refresh": True
215+
}
216+
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
217+
fake_model_config = ModelConfig(model=model_path)
218+
fake_model_config.hf_config = PretrainedConfig()
219+
fake_model_config.hf_config.model_type = "llama"
220+
test_vllm_config.model_config = fake_model_config
221+
init_ascend_config(test_vllm_config)
222+
check_ascend_config(test_vllm_config, False)
223+
# aclgraph + deepseek model
224+
with self.assertRaises(NotImplementedError):
225+
test_vllm_config.additional_config = {
226+
"torchair_graph_config": {
227+
"enabled": False,
228+
},
229+
"refresh": True
230+
}
231+
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
232+
fake_model_config = ModelConfig(model=model_path)
233+
fake_model_config.hf_config = PretrainedConfig()
234+
fake_model_config.hf_config.model_type = "deepseek"
235+
test_vllm_config.model_config = fake_model_config
236+
init_ascend_config(test_vllm_config)
237+
check_ascend_config(test_vllm_config, False)
261238

262239
def test_check_torchair_supported(self):
263240
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),

tests/ut/test_platform.py

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -389,69 +389,6 @@ def test_check_and_update_config_v1_worker_class_selection(
389389
"vllm_ascend.worker.worker_v1.NPUWorker",
390390
)
391391

392-
@patch("vllm_ascend.ascend_config.check_ascend_config")
393-
@patch("vllm_ascend.ascend_config.init_ascend_config")
394-
@patch("vllm.envs.VLLM_USE_V1", False)
395-
def test_check_and_update_config_speculative_worker_config(
396-
self, mock_init_ascend, mock_check_ascend):
397-
mock_init_ascend.return_value = self.mock_ascend_config
398-
self.mock_vllm_config.speculative_config = MagicMock()
399-
self.mock_vllm_config.speculative_config.disable_logprobs = True
400-
self.mock_vllm_config.parallel_config.worker_cls = "auto"
401-
402-
with patch.dict("os.environ", {}):
403-
from vllm_ascend import platform
404-
405-
importlib.reload(platform)
406-
self.platform.check_and_update_config(self.mock_vllm_config)
407-
import os
408-
409-
self.assertEqual(os.environ.get("ACL_OP_INIT_MODE"), "1")
410-
self.assertEqual(
411-
self.mock_vllm_config.parallel_config.worker_cls,
412-
"vllm.spec_decode.spec_decode_worker.create_spec_worker",
413-
)
414-
self.assertEqual(
415-
self.mock_vllm_config.parallel_config.sd_worker_cls,
416-
"vllm_ascend.worker.worker.NPUWorker",
417-
)
418-
419-
@patch("vllm_ascend.ascend_config.check_ascend_config")
420-
@patch("vllm_ascend.ascend_config.init_ascend_config")
421-
@patch("vllm.envs.VLLM_USE_V1", False)
422-
def test_check_and_update_config_multi_step_worker_config(
423-
self, mock_init_ascend, mock_check_ascend):
424-
mock_init_ascend.return_value = self.mock_ascend_config
425-
self.mock_vllm_config.scheduler_config.is_multi_step = True
426-
self.mock_vllm_config.parallel_config.worker_cls = "auto"
427-
428-
from vllm_ascend import platform
429-
430-
importlib.reload(platform)
431-
self.platform.check_and_update_config(self.mock_vllm_config)
432-
self.assertEqual(
433-
self.mock_vllm_config.parallel_config.worker_cls,
434-
"vllm_ascend.worker.multi_step_worker.MultiStepWorker",
435-
)
436-
437-
@patch("vllm_ascend.ascend_config.check_ascend_config")
438-
@patch("vllm_ascend.ascend_config.init_ascend_config")
439-
@patch("vllm.envs.VLLM_USE_V1", False)
440-
def test_check_and_update_config_default_worker_config(
441-
self, mock_init_ascend, mock_check_ascend):
442-
mock_init_ascend.return_value = self.mock_ascend_config
443-
self.mock_vllm_config.parallel_config.worker_cls = "auto"
444-
self.mock_vllm_config.scheduler_config.is_multi_step = False
445-
446-
from vllm_ascend import platform
447-
448-
importlib.reload(platform)
449-
self.platform.check_and_update_config(self.mock_vllm_config)
450-
self.assertEqual(
451-
self.mock_vllm_config.parallel_config.worker_cls,
452-
"vllm_ascend.worker.worker.NPUWorker",
453-
)
454-
455392
@patch("vllm_ascend.ascend_config.check_ascend_config")
456393
@patch("vllm_ascend.ascend_config.init_ascend_config")
457394
@patch("vllm_ascend.utils.is_310p", return_value=True)

vllm_ascend/ascend_config.py

Lines changed: 30 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# limitations under the License.
1616
from typing import Optional
1717

18-
import vllm.envs as envs
1918
from vllm.logger import logger
2019

2120
TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
@@ -126,46 +125,36 @@ def get_ascend_config():
126125
def check_ascend_config(vllm_config, enforce_eager):
127126
ascend_config = get_ascend_config()
128127

129-
# for v0 engine
130-
if not envs.VLLM_USE_V1:
128+
# for eager mode
129+
if enforce_eager:
130+
# torchair_graph cannot be enabled with eager mode.
131131
if ascend_config.torchair_graph_config.enabled:
132-
raise NotImplementedError(
133-
"Torchair graph mode is only supported for V1 Engine.")
134-
if ascend_config.ascend_scheduler_config.enabled:
135-
raise NotImplementedError(
136-
"Ascend scheduler is only supported for V1 Engine.")
137-
# for v1 engine
132+
raise RuntimeError(
133+
"Can't enable graph mode and eager mode at the same time. Please set `enforce_eager=False` if you attempt to enable NPU graph mode."
134+
)
135+
# for graph mode
138136
else:
139-
# for eager mode
140-
if enforce_eager:
141-
# torchair_graph cannot be enabled with eager mode.
142-
if ascend_config.torchair_graph_config.enabled:
143-
raise RuntimeError(
144-
"Can't enable graph mode and eager mode at the same time. Please set `enforce_eager=False` if you attempt to enable NPU graph mode."
145-
)
146-
# for graph mode
137+
# torchair_graph case
138+
if ascend_config.torchair_graph_config.enabled:
139+
# torchair_graph is supported for deepseek/pangu model only.
140+
if vllm_config.model_config:
141+
model_type = vllm_config.model_config.hf_config.model_type
142+
if not _check_torchair_supported(model_type):
143+
raise NotImplementedError(
144+
"Torchair graph mode only works with following model types:"
145+
f"{TORCHAIR_MODEL_LIST}.")
146+
# aclgraph case
147147
else:
148-
# torchair_graph case
149-
if ascend_config.torchair_graph_config.enabled:
150-
# torchair_graph is supported for deepseek/pangu model only.
151-
if vllm_config.model_config:
152-
model_type = vllm_config.model_config.hf_config.model_type
153-
if not _check_torchair_supported(model_type):
154-
raise NotImplementedError(
155-
"Torchair graph mode only works with following model types:"
156-
f"{TORCHAIR_MODEL_LIST}.")
157-
# aclgraph case
158-
else:
159-
# aclgraph doesn't work with deepseek model and only qwen model is well tested.
160-
if vllm_config.model_config:
161-
model_type = vllm_config.model_config.hf_config.model_type
162-
if "deepseek" in model_type:
163-
raise NotImplementedError(
164-
"ACL Graph does not support deepseek. Please "
165-
"try torchair graph mode to serve deepseek models on vllm-ascend."
166-
" Or set `enforce_eager=True` to use eager mode.")
167-
if "qwen" not in model_type:
168-
logger.warning(
169-
"ACL Graph is currently experimental. Please "
170-
"raise an issue on https://github.com/vllm-project/vllm-ascend/issues"
171-
" if you encourage any Error")
148+
# aclgraph doesn't work with deepseek model and only qwen model is well tested.
149+
if vllm_config.model_config:
150+
model_type = vllm_config.model_config.hf_config.model_type
151+
if "deepseek" in model_type:
152+
raise NotImplementedError(
153+
"ACL Graph does not support deepseek. Please "
154+
"try torchair graph mode to serve deepseek models on vllm-ascend."
155+
" Or set `enforce_eager=True` to use eager mode.")
156+
if "qwen" not in model_type:
157+
logger.warning(
158+
"ACL Graph is currently experimental. Please "
159+
"raise an issue on https://github.com/vllm-project/vllm-ascend/issues"
160+
" if you encourage any Error")

vllm_ascend/models/deepseek_dbo.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import torch
3131
import torch.distributed as dist
3232
import torch_npu # noqa: F401
33-
import vllm.envs as envs
3433
from torch import nn
3534
from transformers import PretrainedConfig
3635
from vllm.attention import Attention, AttentionMetadata
@@ -397,20 +396,17 @@ def forward(
397396
hidden_states_or_q_c = hidden_states
398397
if self.torchair_graph_enabled:
399398
forward_kwargs = {}
400-
if envs.VLLM_USE_V1:
401-
output_shape = hidden_states.shape
402-
output = torch.empty(output_shape,
403-
dtype=hidden_states_or_q_c.dtype,
404-
device=hidden_states_or_q_c.device)
405-
forward_kwargs['output'] = output
406-
399+
output_shape = hidden_states.shape
400+
output = torch.empty(output_shape,
401+
dtype=hidden_states_or_q_c.dtype,
402+
device=hidden_states_or_q_c.device)
403+
forward_kwargs['output'] = output
407404
output = self.mla_attn.impl.forward(self.mla_attn,
408405
hidden_states_or_q_c,
409406
hidden_states, None, kv_cache,
410407
attn_metadata,
411408
**forward_kwargs)
412-
if envs.VLLM_USE_V1:
413-
output = output.view(-1, output_shape[-1])
409+
output = output.view(-1, output_shape[-1])
414410
return output
415411
else:
416412
kv_c, k_pe = self.kv_a_proj_with_mqa(hidden_states)[0].split(
@@ -885,7 +881,7 @@ def forward(
885881
def can_run_ms(self):
886882
attn_metadata = get_forward_context().attn_metadata
887883
# support mla attention and V1 engine at present
888-
if not self.use_mla or not envs.VLLM_USE_V1:
884+
if not self.use_mla:
889885
return False
890886
# enable prefill overlap
891887
if attn_metadata is None or attn_metadata.num_prefills == 0:

vllm_ascend/models/deepseek_v2.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
import torch
3131
import torch_npu
32-
import vllm.envs as envs
3332
from torch import nn
3433
from transformers import PretrainedConfig
3534
from vllm.attention import Attention, AttentionMetadata
@@ -579,20 +578,17 @@ def forward(
579578
else:
580579
hidden_states_or_q_c = hidden_states
581580
if self.torchair_graph_enabled:
582-
if envs.VLLM_USE_V1:
583-
output_shape = hidden_states.shape
584-
output = torch.empty(output_shape,
585-
dtype=hidden_states_or_q_c.dtype,
586-
device=hidden_states_or_q_c.device)
587-
forward_kwargs['output'] = output
588-
581+
output_shape = hidden_states.shape
582+
output = torch.empty(output_shape,
583+
dtype=hidden_states_or_q_c.dtype,
584+
device=hidden_states_or_q_c.device)
585+
forward_kwargs['output'] = output
589586
output = self.mla_attn.impl.forward(self.mla_attn,
590587
hidden_states_or_q_c,
591588
hidden_states, None, kv_cache,
592589
attn_metadata,
593590
**forward_kwargs)
594-
if envs.VLLM_USE_V1:
595-
output = output.view(-1, output_shape[-1])
591+
output = output.view(-1, output_shape[-1])
596592
return output
597593
else:
598594
kv_c, k_pe = self.kv_a_proj_with_mqa(hidden_states)[0].split(
@@ -660,7 +656,7 @@ def __init__(
660656
prefix=f"{prefix}.mlp",
661657
)
662658
self.mla_moe_communication = ascend_config.torchair_graph_config.enable_multistream_moe \
663-
and model_config.use_mla and envs.VLLM_USE_V1 and self.tp_size > 1
659+
and model_config.use_mla and self.tp_size > 1
664660
else:
665661
self.mlp = CustomDeepseekV2MLP(
666662
hidden_size=config.hidden_size,

0 commit comments

Comments
 (0)