|
| 1 | +import math |
| 2 | +import os |
| 3 | +import unittest |
| 4 | +from threading import Lock |
| 5 | +from unittest import mock |
| 6 | + |
| 7 | +import torch |
| 8 | +from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig, |
| 9 | + VllmConfig) |
| 10 | + |
| 11 | +from vllm_ascend import utils |
| 12 | + |
| 13 | + |
| 14 | +class TestUtils(unittest.TestCase): |
| 15 | + |
| 16 | + def test_is_310p(self): |
| 17 | + utils._IS_310P = None |
| 18 | + with mock.patch("vllm_ascend._build_info.__soc_version__", |
| 19 | + "Ascend310P3"): |
| 20 | + self.assertTrue(utils.is_310p()) |
| 21 | + utils._IS_310P = None |
| 22 | + with mock.patch("vllm_ascend._build_info.__soc_version__", |
| 23 | + "Ascend910P1"): |
| 24 | + self.assertFalse(utils.is_310p()) |
| 25 | + |
| 26 | + def test_sleep_mode_enabled(self): |
| 27 | + utils._SLEEP_MODE_ENABLED = None |
| 28 | + with mock.patch("vllm_ascend._build_info.__sleep_mode_enabled__", |
| 29 | + True): |
| 30 | + self.assertTrue(utils.sleep_mode_enabled()) |
| 31 | + utils._SLEEP_MODE_ENABLED = None |
| 32 | + with mock.patch("vllm_ascend._build_info.__sleep_mode_enabled__", |
| 33 | + False): |
| 34 | + self.assertFalse(utils.sleep_mode_enabled()) |
| 35 | + |
| 36 | + def test_nd_to_nz_2d(self): |
| 37 | + # can be divided by 16 |
| 38 | + input_tensor = torch.randn(32, 64) |
| 39 | + output = utils.nd_to_nz_2d(input_tensor) |
| 40 | + self.assertEqual(output.shape[0], 1) |
| 41 | + self.assertEqual(output.shape[1], 64 // 16) |
| 42 | + self.assertEqual(output.shape[2], 32) |
| 43 | + self.assertEqual(output.shape[3], 16) |
| 44 | + |
| 45 | + # cannot be divided by 16 |
| 46 | + input_tensor = torch.randn(30, 62) |
| 47 | + output = utils.nd_to_nz_2d(input_tensor) |
| 48 | + self.assertEqual(output.shape[0], 1) |
| 49 | + self.assertEqual(output.shape[1], math.ceil(62 / 16)) |
| 50 | + self.assertEqual(output.shape[2], 32) |
| 51 | + self.assertEqual(output.shape[3], 16) |
| 52 | + |
| 53 | + # pad to 16 |
| 54 | + input_tensor = torch.randn(8, 12) |
| 55 | + output = utils.nd_to_nz_2d(input_tensor) |
| 56 | + self.assertEqual(output.shape[0], 1) |
| 57 | + self.assertEqual(output.shape[1], 1) # 12->16, 16//16=1 |
| 58 | + self.assertEqual(output.shape[2], 16) # 8->16 |
| 59 | + self.assertEqual(output.shape[3], 16) |
| 60 | + |
| 61 | + # check if the output is contiguous |
| 62 | + input_tensor = torch.randn(32, 64) |
| 63 | + output = utils.nd_to_nz_2d(input_tensor) |
| 64 | + self.assertTrue(output.is_contiguous()) |
| 65 | + |
| 66 | + # check if the output values are preserved |
| 67 | + input_tensor = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]]) |
| 68 | + output = utils.nd_to_nz_2d(input_tensor) |
| 69 | + expected = torch.tensor( |
| 70 | + [[[[1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 71 | + [5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 72 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 73 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 74 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 75 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 76 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 77 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 78 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 79 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 80 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 81 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 82 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 83 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 84 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 85 | + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]]) |
| 86 | + self.assertTrue(torch.allclose(output, expected)) |
| 87 | + |
| 88 | + def test_aligned_16(self): |
| 89 | + # align to 16 |
| 90 | + input_tensor = torch.randn(15, 64) |
| 91 | + output_tensor = utils.aligned_16(input_tensor) |
| 92 | + self.assertEqual(output_tensor.shape[0], 16) |
| 93 | + |
| 94 | + # align to 16 |
| 95 | + input_tensor = torch.randn(16, 64) |
| 96 | + output_tensor = utils.aligned_16(input_tensor) |
| 97 | + self.assertEqual(output_tensor.shape[0], 16) |
| 98 | + self.assertTrue(torch.equal(input_tensor, output_tensor)) |
| 99 | + |
| 100 | + # align to 32 |
| 101 | + input_tensor = torch.randn(17, 64) |
| 102 | + output_tensor = utils.aligned_16(input_tensor) |
| 103 | + self.assertEqual(output_tensor.shape[0], 32) |
| 104 | + |
| 105 | + @mock.patch('importlib.util.find_spec') |
| 106 | + @mock.patch('importlib.import_module') |
| 107 | + def test_try_register_lib(self, mock_import_module, mock_find_spec): |
| 108 | + # import OK |
| 109 | + mock_find_spec.return_value = mock.MagicMock() |
| 110 | + mock_import_module.return_value = mock.MagicMock() |
| 111 | + lib_name = "existing_lib" |
| 112 | + lib_info = "Library found and imported successfully" |
| 113 | + utils.try_register_lib(lib_name, lib_info) |
| 114 | + mock_find_spec.assert_called_once_with(lib_name) |
| 115 | + mock_import_module.assert_called_once_with(lib_name) |
| 116 | + |
| 117 | + # Can't find lib |
| 118 | + mock_find_spec.return_value = None |
| 119 | + lib_name = "non_existing_lib" |
| 120 | + utils.try_register_lib(lib_name) |
| 121 | + self.assertEqual(2, mock_find_spec.call_count) |
| 122 | + self.assertEqual(1, mock_import_module.call_count) |
| 123 | + |
| 124 | + # import error |
| 125 | + mock_find_spec.return_value = mock.MagicMock() |
| 126 | + mock_import_module.side_effect = ImportError("import error") |
| 127 | + lib_name = "error_lib" |
| 128 | + utils.try_register_lib(lib_name) |
| 129 | + self.assertEqual(3, mock_find_spec.call_count) |
| 130 | + self.assertEqual(2, mock_import_module.call_count) |
| 131 | + |
| 132 | + def test_enable_custom_op(self): |
| 133 | + result = utils.enable_custom_op() |
| 134 | + self.assertTrue(result) |
| 135 | + |
| 136 | + utils._CUSTOM_OP_ENABLED = None |
| 137 | + |
| 138 | + with mock.patch('builtins.__import__') as mock_import_module: |
| 139 | + mock_import_module.side_effect = ImportError("import error") |
| 140 | + self.assertFalse(utils.enable_custom_op()) |
| 141 | + |
| 142 | + def test_find_hccl_library(self): |
| 143 | + with mock.patch.dict(os.environ, |
| 144 | + {"HCCL_SO_PATH": "/path/to/hccl/libhccl.so"}): |
| 145 | + self.assertEqual(utils.find_hccl_library(), |
| 146 | + "/path/to/hccl/libhccl.so") |
| 147 | + with mock.patch("torch.version.cann", None): |
| 148 | + self.assertRaises(ValueError, utils.find_hccl_library) |
| 149 | + with mock.patch("torch.version.cann", "Ascend910"): |
| 150 | + self.assertEqual(utils.find_hccl_library(), "libhccl.so") |
| 151 | + |
| 152 | + def test_current_stream(self): |
| 153 | + with mock.patch("torch.npu.current_stream") as mock_current_stream: |
| 154 | + self.assertEqual(utils.current_stream(), mock_current_stream()) |
| 155 | + |
| 156 | + def test_vllm_version_is(self): |
| 157 | + with mock.patch.dict(os.environ, {"VLLM_VERSION": "1.0.0"}): |
| 158 | + with mock.patch("vllm.__version__", "1.0.0"): |
| 159 | + self.assertTrue(utils.vllm_version_is("1.0.0")) |
| 160 | + self.assertFalse(utils.vllm_version_is("2.0.0")) |
| 161 | + with mock.patch("vllm.__version__", "2.0.0"): |
| 162 | + self.assertTrue(utils.vllm_version_is("1.0.0")) |
| 163 | + self.assertFalse(utils.vllm_version_is("2.0.0")) |
| 164 | + with mock.patch("vllm.__version__", "1.0.0"): |
| 165 | + self.assertTrue(utils.vllm_version_is("1.0.0")) |
| 166 | + self.assertFalse(utils.vllm_version_is("2.0.0")) |
| 167 | + with mock.patch("vllm.__version__", "2.0.0"): |
| 168 | + self.assertTrue(utils.vllm_version_is("2.0.0")) |
| 169 | + self.assertFalse(utils.vllm_version_is("1.0.0")) |
| 170 | + |
| 171 | + def test_update_aclgraph_sizes(self): |
| 172 | + # max_num_batch_sizes < len(original_sizes) |
| 173 | + test_compilation_config = CompilationConfig( |
| 174 | + cudagraph_capture_sizes=[i for i in range(150)]) |
| 175 | + model_path = os.path.join(os.path.dirname(__file__), "fake_weight") |
| 176 | + test_model_config = ModelConfig(model=model_path, enforce_eager=True) |
| 177 | + test_parallel_config = ParallelConfig() |
| 178 | + test_vllm_config = VllmConfig( |
| 179 | + model_config=test_model_config, |
| 180 | + compilation_config=test_compilation_config, |
| 181 | + parallel_config=test_parallel_config, |
| 182 | + ) |
| 183 | + utils.update_aclgraph_sizes(test_vllm_config) |
| 184 | + self.assertEqual( |
| 185 | + 147, |
| 186 | + len(test_vllm_config.compilation_config.cudagraph_capture_sizes)) |
| 187 | + # max_num_batch_sizes >= len(original_sizes) |
| 188 | + test_compilation_config = CompilationConfig( |
| 189 | + cudagraph_capture_sizes=[1, 2, 3]) |
| 190 | + test_vllm_config = VllmConfig( |
| 191 | + model_config=test_model_config, |
| 192 | + compilation_config=test_compilation_config, |
| 193 | + parallel_config=test_parallel_config, |
| 194 | + ) |
| 195 | + utils.update_aclgraph_sizes(test_vllm_config) |
| 196 | + self.assertEqual( |
| 197 | + 3, |
| 198 | + len(test_vllm_config.compilation_config.cudagraph_capture_sizes)) |
| 199 | + |
| 200 | + |
| 201 | +class TestProfileExecuteDuration(unittest.TestCase): |
| 202 | + |
| 203 | + def setUp(self): |
| 204 | + utils.ProfileExecuteDuration._instance = None |
| 205 | + utils.ProfileExecuteDuration._observations = [] |
| 206 | + utils.ProfileExecuteDuration._lock = Lock() |
| 207 | + |
| 208 | + def test_singleton_creation(self): |
| 209 | + instance1 = utils.ProfileExecuteDuration() |
| 210 | + self.assertIsNotNone(instance1) |
| 211 | + self.assertIs(instance1, utils.ProfileExecuteDuration._instance) |
| 212 | + |
| 213 | + instance2 = utils.ProfileExecuteDuration() |
| 214 | + self.assertIs(instance1, instance2) |
| 215 | + |
| 216 | + def test_thread_safety(self): |
| 217 | + from threading import Thread |
| 218 | + |
| 219 | + instances = [] |
| 220 | + |
| 221 | + def create_instance(): |
| 222 | + instances.append(utils.ProfileExecuteDuration()) |
| 223 | + |
| 224 | + threads = [Thread(target=create_instance) for _ in range(10)] |
| 225 | + for t in threads: |
| 226 | + t.start() |
| 227 | + for t in threads: |
| 228 | + t.join() |
| 229 | + |
| 230 | + first_instance = instances[0] |
| 231 | + for instance in instances[1:]: |
| 232 | + self.assertIs(first_instance, instance) |
| 233 | + |
| 234 | + def test_atexit_registration(self): |
| 235 | + with mock.patch('atexit.register') as mock_register: |
| 236 | + instance = utils.ProfileExecuteDuration() |
| 237 | + mock_register.assert_called_once_with(instance.destroy) |
| 238 | + |
| 239 | + def test_lock_usage(self): |
| 240 | + original_lock = utils.ProfileExecuteDuration._lock |
| 241 | + |
| 242 | + with mock.patch.object(utils.ProfileExecuteDuration, |
| 243 | + '_lock', |
| 244 | + wraps=original_lock) as mock_lock: |
| 245 | + utils.ProfileExecuteDuration() |
| 246 | + mock_lock.__enter__.assert_called() |
| 247 | + mock_lock.__exit__.assert_called() |
| 248 | + |
| 249 | + def test_observations_initialization(self): |
| 250 | + instance = utils.ProfileExecuteDuration() |
| 251 | + self.assertEqual(instance._observations, []) |
0 commit comments