Skip to content

Fix the device error when using ray as vllm-acend backend #1234

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions examples/offline_multi_step_custom_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

from vllm import LLM, SamplingParams

import vllm_ascend.platform as pf

pf.CUSTOM_OP_ENABLED = True # set True for custom Ops of Multi-Step.
prompts = [
"Hello, my name is",
"The president of the United States is",
Expand Down
4 changes: 3 additions & 1 deletion tests/singlecard/ops/test_rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import torch
import torch.nn as nn

import vllm_ascend.platform # noqa: F401
from vllm_ascend.utils import enable_custom_op

enable_custom_op()

# Only Neox style true scenario is supported for now
IS_NEOX_STYLE = [True]
Expand Down
4 changes: 2 additions & 2 deletions vllm_ascend/attention/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

from vllm_ascend.ascend_config import get_ascend_config
from vllm_ascend.ops.cache import concat_and_cache_mla
from vllm_ascend.platform import CUSTOM_OP_ENABLED
from vllm_ascend.utils import enable_custom_op
from vllm_ascend.worker.model_runner import (
ModelInputForNPUBuilder, ModelInputForNPUWithSamplingMetadata)

Expand Down Expand Up @@ -460,7 +460,7 @@ def advance_step(self,
for i in range(num_queries):
self.seq_lens[i] += 1
self.max_decode_seq_len = max(self.seq_lens)
if CUSTOM_OP_ENABLED:
if enable_custom_op():
#advance a step on NPU for existing inputs for a multi-step runner if custom ops is enabled
torch.ops._C.advance_step_flashattn_ascendc(
num_seqs=num_seqs,
Expand Down
5 changes: 3 additions & 2 deletions vllm_ascend/ops/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@
from vllm.model_executor.layers.rotary_embedding import (
DeepseekScalingRotaryEmbedding, RotaryEmbedding)

from vllm_ascend.platform import CUSTOM_OP_ENABLED
from vllm_ascend.utils import enable_custom_op


def custom_rotary_embedding_enabled(query, neox_style, head_size):
return query.dtype == torch.float16 and neox_style and head_size % 32 == 0 and CUSTOM_OP_ENABLED
return query.dtype == torch.float16 and neox_style and head_size % 32 == 0 and enable_custom_op(
)


def rope_forward_oot(
Expand Down
12 changes: 0 additions & 12 deletions vllm_ascend/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#

import gc
import logging
import os
from datetime import timedelta
from typing import TYPE_CHECKING, Optional, Tuple
Expand All @@ -32,16 +31,6 @@
from vllm_ascend.ascend_config import check_ascend_config, init_ascend_config
from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD, update_aclgraph_sizes

CUSTOM_OP_ENABLED = False
try:
# register custom ops into torch_library here
import vllm_ascend.vllm_ascend_C # type: ignore # noqa: F401
CUSTOM_OP_ENABLED = True
except ImportError as e:
logging.warning(
"Failed to import 'vllm_ascend.vllm_ascend_C': %s. All custom ops will be disabled. ",
e)

if TYPE_CHECKING:
from vllm.config import ModelConfig, VllmConfig
from vllm.utils import FlexibleArgumentParser
Expand All @@ -50,7 +39,6 @@
VllmConfig = None
FlexibleArgumentParser = None

os.environ["RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES"] = "1"
os.environ["ACL_OP_INIT_MODE"] = ascend_envs.VLLM_ASCEND_ACL_OP_INIT_MODE


Expand Down
27 changes: 27 additions & 0 deletions vllm_ascend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@

ASCEND_QUATIZATION_METHOD = "ascend"

CUSTOM_OP_ENABLED = None


def try_register_lib(lib_name: str, lib_info: str = ""):
import importlib
Expand All @@ -58,6 +60,31 @@ def try_register_lib(lib_name: str, lib_info: str = ""):
pass


def enable_custom_op():
"""
Enable lazy init for vllm_ascend_C to avoid early initialization of CANN's RTS component.
Ensure that ASCEND_RT_VISIBLE_DEVICES can be dynamically modified before torch.npu.set_device().
"""
global CUSTOM_OP_ENABLED

if CUSTOM_OP_ENABLED is not None:
return CUSTOM_OP_ENABLED

else:
try:
# register custom ops into torch_library here
import vllm_ascend.vllm_ascend_C # type: ignore # noqa: F401
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The import of vllm_ascend_C seems more than this place, can you locate them and wrap them too?

CUSTOM_OP_ENABLED = True

except ImportError:
CUSTOM_OP_ENABLED = False
logger.warning(
"Warning: Failed to register custom ops, all custom ops will be disabled"
)

return CUSTOM_OP_ENABLED


def find_hccl_library() -> str:
"""
We either use the library file specified by the `HCCL_SO_PATH`
Expand Down