rm RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES & add lazy init for vllm_ascend_C

zhuo97 · zhuo97 · commit f085e488996d · 2025-05-16T15:33:06.000+08:00
diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py
@@ -22,11 +22,12 @@
 from vllm.model_executor.layers.rotary_embedding import (
     DeepseekScalingRotaryEmbedding, RotaryEmbedding)
 
-from vllm_ascend.platform import CUSTOM_OP_ENABLED
+from vllm_ascend.utils import try_register_lib
 
 
 def custom_rotary_embedding_enabled(query, neox_style, head_size):
-    return query.dtype == torch.float16 and neox_style and head_size % 32 == 0 and CUSTOM_OP_ENABLED
+    try_register_lib("vllm_ascend.vllm_ascend_C")
+    return query.dtype == torch.float16 and neox_style and head_size % 32 == 0
 
 
 def rope_forward_oot(
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
@@ -15,8 +15,6 @@
 # This file is a part of the vllm-ascend project.
 #
 
-import logging
-import os
 from typing import TYPE_CHECKING, Optional, Tuple
 
 import torch
@@ -27,18 +25,6 @@
 
 from vllm_ascend.utils import update_aclgraph_sizes
 
-CUSTOM_OP_ENABLED = False
-try:
-    # register custom ops into torch_library here
-    import vllm_ascend.vllm_ascend_C  # type: ignore  # noqa: F401
-
-except ImportError:
-    logging.warning(
-        "Warning: Failed to register custom ops, all custom ops will be disabled"
-    )
-else:
-    CUSTOM_OP_ENABLED = True
-
 if TYPE_CHECKING:
     from vllm.config import ModelConfig, VllmConfig
     from vllm.utils import FlexibleArgumentParser
@@ -47,8 +33,6 @@
     VllmConfig = None
     FlexibleArgumentParser = None
 
-os.environ["RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES"] = "1"
-
 
 class NPUPlatform(Platform):