We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3ea2410 commit 19f97f7Copy full SHA for 19f97f7
vllm_ascend/worker/model_runner_v1.py
@@ -2101,7 +2101,7 @@ def check_torchair_graph_batch_sizes(self):
2101
if self.parallel_config.enable_expert_parallel:
2102
new_graph_batch_sizes = []
2103
for graph_batch_size in self.torchair_graph_batch_sizes:
2104
- cur_graph_batch_size = graph_batch_size + tp_size - graph_batch_size % tp_size
+ cur_graph_batch_size = math.ceil(graph_batch_size / tp_size) * tp_size
2105
if cur_graph_batch_size not in new_graph_batch_sizes:
2106
new_graph_batch_sizes.append(cur_graph_batch_size)
2107
self.torchair_graph_batch_sizes = new_graph_batch_sizes
0 commit comments