Skip to content

Commit b917361

Browse files
authored
[MISC] Clean up torch_npu (#688)
torch_npu 2.5.1 support autoload now. This patch does: 1. remove useless torch_npu import 2. replace `torch_npu.npu` to `torch.npu`. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 0329fad commit b917361

File tree

15 files changed

+18
-47
lines changed

15 files changed

+18
-47
lines changed

examples/dp_offline/data_parallel.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ def main():
3030
for i in range(local_rank * tp_size, (local_rank + 1) * tp_size))
3131

3232
import torch
33-
import torch_npu # noqa
3433
from vllm import LLM, SamplingParams
3534
from vllm.distributed.parallel_state import (
3635
destroy_distributed_environment, destroy_model_parallel)

tests/multicard/test_pyhccl_distributed.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import os
2121

2222
import torch
23-
import torch_npu # noqa: F401
2423
from vllm.distributed.parallel_state import (get_world_group,
2524
init_distributed_environment)
2625
from vllm.utils import update_environment_variables

tests/ops/test_rotary_embedding.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import pytest
1010
import torch
1111
import torch.nn as nn
12-
import torch_npu # noqa: F401
1312

1413
import vllm_ascend.platform # noqa: F401
1514

tests/singlecard/test_camem.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,6 @@
2525
from tests.utils import fork_new_process_for_each_test
2626
from vllm_ascend.device_allocator.camem import CaMemAllocator
2727

28-
try:
29-
import torch_npu # noqa: F401
30-
except ImportError:
31-
print("Failed to import torch_npu.")
32-
3328

3429
@fork_new_process_for_each_test
3530
def test_basic_camem():
@@ -53,9 +48,9 @@ def test_basic_camem():
5348
output = x + y + z
5449
assert torch.allclose(output, torch.ones_like(output) * 3)
5550

56-
free_bytes = torch_npu.npu.mem_get_info()[0]
51+
free_bytes = torch.npu.mem_get_info()[0]
5752
allocator.sleep()
58-
free_bytes_after_sleep = torch_npu.npu.mem_get_info()[0]
53+
free_bytes_after_sleep = torch.npu.mem_get_info()[0]
5954
assert free_bytes_after_sleep > free_bytes
6055
allocator.wake_up()
6156

@@ -67,7 +62,7 @@ def test_basic_camem():
6762
@fork_new_process_for_each_test
6863
def test_end_to_end():
6964
os.environ["VLLM_USE_V1"] = "0"
70-
free, total = torch_npu.npu.mem_get_info()
65+
free, total = torch.npu.mem_get_info()
7166
used_bytes_baseline = total - free # in case other process is running
7267
llm = LLM("Qwen/Qwen2.5-0.5B-Instruct", enable_sleep_mode=True)
7368
prompt = "How are you?"
@@ -79,7 +74,7 @@ def test_end_to_end():
7974
# test sleep level 1 here.
8075
llm.sleep(level=1)
8176

82-
free_gpu_bytes_after_sleep, total = torch_npu.npu.mem_get_info()
77+
free_gpu_bytes_after_sleep, total = torch.npu.mem_get_info()
8378
used_bytes = total - free_gpu_bytes_after_sleep - used_bytes_baseline
8479
# now the memory usage should be less than the model weights
8580
# (0.5B model, 1GiB weights)

tests/singlecard/test_pyhccl.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
# limitations under the License.
1818
#
1919
import torch
20-
import torch_npu # noqa: F401
2120

2221
from vllm_ascend.distributed.device_communicators.pyhccl_wrapper import \
2322
HCCLLibrary

vllm_ascend/attention/attention.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,9 @@
2020

2121
import numpy as np
2222
import torch
23-
from torch.nn.functional import scaled_dot_product_attention
24-
25-
try:
26-
import torch_npu # noqa: F401
27-
except ImportError:
28-
print("Failed to import torch_npu.")
29-
23+
import torch_npu
3024
import torchair._contrib.custom_torch_ops # type: ignore # noqa: F401
25+
from torch.nn.functional import scaled_dot_product_attention
3126
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
3227
AttentionLayer,
3328
AttentionMetadata, AttentionType,

vllm_ascend/device_allocator/camem.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,6 @@
2424
import torch
2525
from acl.rt import memcpy # type: ignore # noqa: F401
2626
from vllm.logger import logger
27-
28-
try:
29-
import torch_npu # noqa: F401
30-
except ImportError:
31-
print("Failed to import torch_npu.")
32-
3327
from vllm.utils import is_pin_memory_available
3428

3529

@@ -95,10 +89,10 @@ def unmap_and_release(allocation_handle: HandleType) -> None:
9589
def get_pluggable_allocator(
9690
python_malloc_fn: Callable[[tuple[int, int, int, int]], None],
9791
python_free_func: Callable[[int], tuple[int, int, int, int]]
98-
) -> torch_npu.npu.memory.NPUPluggableAllocator:
92+
) -> torch.npu.memory.NPUPluggableAllocator:
9993
init_module(python_malloc_fn, python_free_func)
100-
new_alloc = torch_npu.npu.memory.NPUPluggableAllocator(
101-
lib_name, 'my_malloc', 'my_free')
94+
new_alloc = torch.npu.memory.NPUPluggableAllocator(lib_name, 'my_malloc',
95+
'my_free')
10296
return new_alloc
10397

10498

@@ -107,8 +101,8 @@ def use_memory_pool_with_allocator(
107101
python_malloc_fn: Callable[[tuple[int, int, int, int]], None],
108102
python_free_func: Callable[[int], tuple[int, int, int, int]]):
109103
new_alloc = get_pluggable_allocator(python_malloc_fn, python_free_func)
110-
mem_pool = torch_npu.npu.memory.MemPool(new_alloc._allocator)
111-
with torch_npu.npu.memory.use_mem_pool(mem_pool):
104+
mem_pool = torch.npu.memory.MemPool(new_alloc._allocator)
105+
with torch.npu.memory.use_mem_pool(mem_pool):
112106
yield mem_pool, new_alloc
113107

114108

vllm_ascend/distributed/device_communicators/pyhccl.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import torch
2121
import torch.distributed as dist
22-
import torch_npu # noqa: F401
2322
from torch.distributed import ProcessGroup, ReduceOp
2423
from vllm.distributed.utils import StatelessProcessGroup
2524
from vllm.logger import logger

vllm_ascend/ops/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#
1717

1818
import torch
19-
import torch_npu # noqa: F401
2019

2120
import vllm_ascend.ops.activation # noqa
2221
import vllm_ascend.ops.common_fused_moe # noqa
@@ -34,7 +33,7 @@ def __init__(self, name=""):
3433

3534

3635
def register_dummy_fusion_op() -> None:
37-
torch.cuda.CUDAGraph = torch_npu.npu.NPUGraph
36+
torch.cuda.CUDAGraph = torch.npu.NPUGraph
3837
torch.ops._C.rms_norm = dummyFusionOp(name="rms_norm")
3938
torch.ops._C.fused_add_rms_norm = dummyFusionOp(name="fused_add_rms_norm")
4039
torch.ops._C.static_scaled_fp8_quant = dummyFusionOp(

vllm_ascend/patch/worker/patch_0_8_4/patch_metrics.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from typing import Callable, Optional, Union
1919

2020
import torch
21-
import torch_npu
2221
from vllm.spec_decode.metrics import (AsyncMetricsCollector,
2322
SpecDecodeWorkerMetrics)
2423

@@ -36,7 +35,7 @@ def init_tensors(self,
3635
if isinstance(device_type, torch.device):
3736
device_type = device_type.type
3837
if device_type == 'npu':
39-
self._copy_stream = torch_npu.npu.Stream()
38+
self._copy_stream = torch.npu.Stream()
4039

4140

4241
def maybe_collect_rejsample_metrics(

0 commit comments

Comments
 (0)