Skip to content

Commit 5968dff

Browse files
authored
[Build] Add build info (#1386)
Add static build_info py file to show soc and sleep mode info. It helps to make the code clean and the error info will be more friendly for users This PR also added the unit test for vllm_ascend/utils.py This PR also added the base test class for all ut in tests/ut/base.py Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent c563a08 commit 5968dff

File tree

11 files changed

+388
-66
lines changed

11 files changed

+388
-66
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,5 @@ kernel_meta/
196196

197197
# version file generated by setuptools-scm
198198
/vllm_ascend/_version.py
199+
# build info file generated by setup.py
200+
/vllm_ascend/_build_info.py

setup.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
from setuptools import Extension, find_packages, setup
2929
from setuptools.command.build_ext import build_ext
30+
from setuptools.command.build_py import build_py
3031
from setuptools.command.develop import develop
3132
from setuptools.command.install import install
3233
from setuptools_scm import get_version
@@ -78,6 +79,30 @@ def __init__(self,
7879
self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
7980

8081

82+
class custom_build_info(build_py):
83+
84+
def run(self):
85+
soc_version = envs.SOC_VERSION
86+
if not soc_version:
87+
raise ValueError(
88+
"SOC version is not set. Please set SOC_VERSION environment variable."
89+
)
90+
if "310" in soc_version and not envs.COMPILE_CUSTOM_KERNELS:
91+
raise ValueError(
92+
"SOC version 310 only supports custom kernels. Please set COMPILE_CUSTOM_KERNELS=1 to enable custom kernels."
93+
)
94+
95+
package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py")
96+
with open(package_dir, "w+") as f:
97+
f.write('# Auto-generated file\n')
98+
f.write(f"__soc_version__ = '{soc_version}'\n")
99+
f.write(
100+
f"__sleep_mode_enabled__ = {envs.COMPILE_CUSTOM_KERNELS}\n")
101+
logging.info(
102+
f"Generated _build_info.py with SOC version: {soc_version}")
103+
super().run()
104+
105+
81106
class cmake_build_ext(build_ext):
82107
# A dict of extension directories that have been configured.
83108
did_config: Dict[str, bool] = {}
@@ -326,7 +351,11 @@ def _read_requirements(filename: str) -> List[str]:
326351
return requirements
327352

328353

329-
cmdclass = {"build_ext": cmake_build_ext, "install": custom_install}
354+
cmdclass = {
355+
"build_py": custom_build_info,
356+
"build_ext": cmake_build_ext,
357+
"install": custom_install
358+
}
330359

331360
setup(
332361
name="vllm_ascend",

tests/ut/base.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import unittest
2+
3+
from vllm_ascend.utils import adapt_patch
4+
5+
6+
class TestBase(unittest.TestCase):
7+
8+
def setUp(self):
9+
# adapt patch by default.
10+
adapt_patch(True)
11+
adapt_patch()
12+
super().setUp()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from tests.ut.base import TestBase
2+
3+
4+
class TestPatchDistributed(TestBase):
5+
6+
def test_GroupCoordinator_patched(self):
7+
from vllm.distributed.parallel_state import GroupCoordinator
8+
9+
from vllm_ascend.patch.worker.patch_common.patch_distributed import \
10+
GroupCoordinatorPatch
11+
12+
self.assertIs(GroupCoordinator, GroupCoordinatorPatch)

tests/ut/test_utils.py

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
import math
2+
import os
3+
import unittest
4+
from threading import Lock
5+
from unittest import mock
6+
7+
import torch
8+
from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig,
9+
VllmConfig)
10+
11+
from vllm_ascend import utils
12+
13+
14+
class TestUtils(unittest.TestCase):
15+
16+
def test_is_310p(self):
17+
utils._IS_310P = None
18+
with mock.patch("vllm_ascend._build_info.__soc_version__",
19+
"Ascend310P3"):
20+
self.assertTrue(utils.is_310p())
21+
utils._IS_310P = None
22+
with mock.patch("vllm_ascend._build_info.__soc_version__",
23+
"Ascend910P1"):
24+
self.assertFalse(utils.is_310p())
25+
26+
def test_sleep_mode_enabled(self):
27+
utils._SLEEP_MODE_ENABLED = None
28+
with mock.patch("vllm_ascend._build_info.__sleep_mode_enabled__",
29+
True):
30+
self.assertTrue(utils.sleep_mode_enabled())
31+
utils._SLEEP_MODE_ENABLED = None
32+
with mock.patch("vllm_ascend._build_info.__sleep_mode_enabled__",
33+
False):
34+
self.assertFalse(utils.sleep_mode_enabled())
35+
36+
def test_nd_to_nz_2d(self):
37+
# can be divided by 16
38+
input_tensor = torch.randn(32, 64)
39+
output = utils.nd_to_nz_2d(input_tensor)
40+
self.assertEqual(output.shape[0], 1)
41+
self.assertEqual(output.shape[1], 64 // 16)
42+
self.assertEqual(output.shape[2], 32)
43+
self.assertEqual(output.shape[3], 16)
44+
45+
# cannot be divided by 16
46+
input_tensor = torch.randn(30, 62)
47+
output = utils.nd_to_nz_2d(input_tensor)
48+
self.assertEqual(output.shape[0], 1)
49+
self.assertEqual(output.shape[1], math.ceil(62 / 16))
50+
self.assertEqual(output.shape[2], 32)
51+
self.assertEqual(output.shape[3], 16)
52+
53+
# pad to 16
54+
input_tensor = torch.randn(8, 12)
55+
output = utils.nd_to_nz_2d(input_tensor)
56+
self.assertEqual(output.shape[0], 1)
57+
self.assertEqual(output.shape[1], 1) # 12->16, 16//16=1
58+
self.assertEqual(output.shape[2], 16) # 8->16
59+
self.assertEqual(output.shape[3], 16)
60+
61+
# check if the output is contiguous
62+
input_tensor = torch.randn(32, 64)
63+
output = utils.nd_to_nz_2d(input_tensor)
64+
self.assertTrue(output.is_contiguous())
65+
66+
# check if the output values are preserved
67+
input_tensor = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
68+
output = utils.nd_to_nz_2d(input_tensor)
69+
expected = torch.tensor(
70+
[[[[1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
71+
[5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
72+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
73+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
74+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
75+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
76+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
77+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
78+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
79+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
80+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
81+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
82+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
83+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
84+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
85+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]])
86+
self.assertTrue(torch.allclose(output, expected))
87+
88+
def test_aligned_16(self):
89+
# align to 16
90+
input_tensor = torch.randn(15, 64)
91+
output_tensor = utils.aligned_16(input_tensor)
92+
self.assertEqual(output_tensor.shape[0], 16)
93+
94+
# align to 16
95+
input_tensor = torch.randn(16, 64)
96+
output_tensor = utils.aligned_16(input_tensor)
97+
self.assertEqual(output_tensor.shape[0], 16)
98+
self.assertTrue(torch.equal(input_tensor, output_tensor))
99+
100+
# align to 32
101+
input_tensor = torch.randn(17, 64)
102+
output_tensor = utils.aligned_16(input_tensor)
103+
self.assertEqual(output_tensor.shape[0], 32)
104+
105+
@mock.patch('importlib.util.find_spec')
106+
@mock.patch('importlib.import_module')
107+
def test_try_register_lib(self, mock_import_module, mock_find_spec):
108+
# import OK
109+
mock_find_spec.return_value = mock.MagicMock()
110+
mock_import_module.return_value = mock.MagicMock()
111+
lib_name = "existing_lib"
112+
lib_info = "Library found and imported successfully"
113+
utils.try_register_lib(lib_name, lib_info)
114+
mock_find_spec.assert_called_once_with(lib_name)
115+
mock_import_module.assert_called_once_with(lib_name)
116+
117+
# Can't find lib
118+
mock_find_spec.return_value = None
119+
lib_name = "non_existing_lib"
120+
utils.try_register_lib(lib_name)
121+
self.assertEqual(2, mock_find_spec.call_count)
122+
self.assertEqual(1, mock_import_module.call_count)
123+
124+
# import error
125+
mock_find_spec.return_value = mock.MagicMock()
126+
mock_import_module.side_effect = ImportError("import error")
127+
lib_name = "error_lib"
128+
utils.try_register_lib(lib_name)
129+
self.assertEqual(3, mock_find_spec.call_count)
130+
self.assertEqual(2, mock_import_module.call_count)
131+
132+
def test_enable_custom_op(self):
133+
result = utils.enable_custom_op()
134+
self.assertTrue(result)
135+
136+
utils._CUSTOM_OP_ENABLED = None
137+
138+
with mock.patch('builtins.__import__') as mock_import_module:
139+
mock_import_module.side_effect = ImportError("import error")
140+
self.assertFalse(utils.enable_custom_op())
141+
142+
def test_find_hccl_library(self):
143+
with mock.patch.dict(os.environ,
144+
{"HCCL_SO_PATH": "/path/to/hccl/libhccl.so"}):
145+
self.assertEqual(utils.find_hccl_library(),
146+
"/path/to/hccl/libhccl.so")
147+
with mock.patch("torch.version.cann", None):
148+
self.assertRaises(ValueError, utils.find_hccl_library)
149+
with mock.patch("torch.version.cann", "Ascend910"):
150+
self.assertEqual(utils.find_hccl_library(), "libhccl.so")
151+
152+
def test_current_stream(self):
153+
with mock.patch("torch.npu.current_stream") as mock_current_stream:
154+
self.assertEqual(utils.current_stream(), mock_current_stream())
155+
156+
def test_vllm_version_is(self):
157+
with mock.patch.dict(os.environ, {"VLLM_VERSION": "1.0.0"}):
158+
with mock.patch("vllm.__version__", "1.0.0"):
159+
self.assertTrue(utils.vllm_version_is("1.0.0"))
160+
self.assertFalse(utils.vllm_version_is("2.0.0"))
161+
with mock.patch("vllm.__version__", "2.0.0"):
162+
self.assertTrue(utils.vllm_version_is("1.0.0"))
163+
self.assertFalse(utils.vllm_version_is("2.0.0"))
164+
with mock.patch("vllm.__version__", "1.0.0"):
165+
self.assertTrue(utils.vllm_version_is("1.0.0"))
166+
self.assertFalse(utils.vllm_version_is("2.0.0"))
167+
with mock.patch("vllm.__version__", "2.0.0"):
168+
self.assertTrue(utils.vllm_version_is("2.0.0"))
169+
self.assertFalse(utils.vllm_version_is("1.0.0"))
170+
171+
def test_update_aclgraph_sizes(self):
172+
# max_num_batch_sizes < len(original_sizes)
173+
test_compilation_config = CompilationConfig(
174+
cudagraph_capture_sizes=[i for i in range(150)])
175+
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
176+
test_model_config = ModelConfig(model=model_path, enforce_eager=True)
177+
test_parallel_config = ParallelConfig()
178+
test_vllm_config = VllmConfig(
179+
model_config=test_model_config,
180+
compilation_config=test_compilation_config,
181+
parallel_config=test_parallel_config,
182+
)
183+
utils.update_aclgraph_sizes(test_vllm_config)
184+
self.assertEqual(
185+
147,
186+
len(test_vllm_config.compilation_config.cudagraph_capture_sizes))
187+
# max_num_batch_sizes >= len(original_sizes)
188+
test_compilation_config = CompilationConfig(
189+
cudagraph_capture_sizes=[1, 2, 3])
190+
test_vllm_config = VllmConfig(
191+
model_config=test_model_config,
192+
compilation_config=test_compilation_config,
193+
parallel_config=test_parallel_config,
194+
)
195+
utils.update_aclgraph_sizes(test_vllm_config)
196+
self.assertEqual(
197+
3,
198+
len(test_vllm_config.compilation_config.cudagraph_capture_sizes))
199+
200+
201+
class TestProfileExecuteDuration(unittest.TestCase):
202+
203+
def setUp(self):
204+
utils.ProfileExecuteDuration._instance = None
205+
utils.ProfileExecuteDuration._observations = []
206+
utils.ProfileExecuteDuration._lock = Lock()
207+
208+
def test_singleton_creation(self):
209+
instance1 = utils.ProfileExecuteDuration()
210+
self.assertIsNotNone(instance1)
211+
self.assertIs(instance1, utils.ProfileExecuteDuration._instance)
212+
213+
instance2 = utils.ProfileExecuteDuration()
214+
self.assertIs(instance1, instance2)
215+
216+
def test_thread_safety(self):
217+
from threading import Thread
218+
219+
instances = []
220+
221+
def create_instance():
222+
instances.append(utils.ProfileExecuteDuration())
223+
224+
threads = [Thread(target=create_instance) for _ in range(10)]
225+
for t in threads:
226+
t.start()
227+
for t in threads:
228+
t.join()
229+
230+
first_instance = instances[0]
231+
for instance in instances[1:]:
232+
self.assertIs(first_instance, instance)
233+
234+
def test_atexit_registration(self):
235+
with mock.patch('atexit.register') as mock_register:
236+
instance = utils.ProfileExecuteDuration()
237+
mock_register.assert_called_once_with(instance.destroy)
238+
239+
def test_lock_usage(self):
240+
original_lock = utils.ProfileExecuteDuration._lock
241+
242+
with mock.patch.object(utils.ProfileExecuteDuration,
243+
'_lock',
244+
wraps=original_lock) as mock_lock:
245+
utils.ProfileExecuteDuration()
246+
mock_lock.__enter__.assert_called()
247+
mock_lock.__exit__.assert_called()
248+
249+
def test_observations_initialization(self):
250+
instance = utils.ProfileExecuteDuration()
251+
self.assertEqual(instance._observations, [])

vllm_ascend/device_allocator/camem.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ def get_instance() -> "CaMemAllocator":
138138
We cannot call the constructor directly.
139139
Call this method to get the instance.
140140
"""
141-
assert camem_available, "camem allocator is not available"
142141
if CaMemAllocator.instance is None:
143142
CaMemAllocator.instance = CaMemAllocator()
144143
return CaMemAllocator.instance

vllm_ascend/ops/fused_moe.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ def fused_experts_with_mc2(
155155
kwargs_mc2.update(stage1_kwargs)
156156

157157
output = torch_npu.npu_moe_distribute_dispatch(**kwargs_mc2)
158-
# comm_stream.wait_stream(torch.npu.current_stream())
159158
expand_x, dynamic_scale, expand_idx, expert_token_nums, ep_recv_counts = output[
160159
0:5]
161160

vllm_ascend/patch/platform/patch_common/patch_distributed.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import vllm.envs as envs
2424
from vllm.config import ParallelConfig
2525

26-
from vllm_ascend.utils import NullHandle, is_310p
26+
from vllm_ascend.utils import is_310p
2727

2828

2929
def ascend_destroy_model_parallel():
@@ -66,6 +66,15 @@ def parallel_config_get_dp_port(self) -> int:
6666
ParallelConfig.get_next_dp_init_port = parallel_config_get_dp_port
6767

6868

69+
class NullHandle:
70+
71+
def __init__(self):
72+
pass
73+
74+
def wait(self):
75+
pass
76+
77+
6978
def communication_adaptation_310p():
7079

7180
def broadcast310p(tensor, src, group=None, async_op=False):

0 commit comments

Comments
 (0)