From 37f1170dfb26ffedab28f320c4cd83473a863eb4 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 8 Jul 2025 17:09:40 +0800 Subject: [PATCH 1/3] upgrade ci npu to 64g per chip Signed-off-by: wangli --- .github/workflows/accuracy_test.yaml | 4 ++-- .github/workflows/vllm_ascend_doctest.yaml | 2 +- .github/workflows/vllm_ascend_test.yaml | 6 +++--- .github/workflows/vllm_ascend_test_long_term.yaml | 2 +- benchmarks/scripts/run_accuracy.py | 12 ++++++------ 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index 6086bbb457..29f5a7f53f 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -85,8 +85,8 @@ jobs: }} runs-on: >- ${{ - (matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') || - 'linux-arm64-npu-2' + (matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arrch64-a2-2') || + 'linux-arrch64-a2-1' }} strategy: matrix: diff --git a/.github/workflows/vllm_ascend_doctest.yaml b/.github/workflows/vllm_ascend_doctest.yaml index f26df372cd..462e67b1ff 100644 --- a/.github/workflows/vllm_ascend_doctest.yaml +++ b/.github/workflows/vllm_ascend_doctest.yaml @@ -48,7 +48,7 @@ jobs: matrix: vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler] name: vLLM Ascend test - runs-on: linux-arm64-npu-1 + runs-on: linux-arrch64-a2-1 container: image: m.daocloud.io/quay.io/ascend/vllm-ascend:${{ matrix.vllm_verison }} steps: diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 236b10f13c..9bb809296d 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -200,7 +200,7 @@ jobs: strategy: max-parallel: 2 matrix: - os: [linux-arm64-npu-1] + os: [linux-arrch64-a2-1] vllm_version: [main, v0.9.2] name: singlecard e2e test runs-on: ${{ matrix.os }} @@ -299,9 +299,9 @@ jobs: needs: [e2e] if: ${{ needs.e2e.result == 'success' }} strategy: - max-parallel: 1 + max-parallel: 2 matrix: - os: [linux-arm64-npu-4] + os: [linux-arrch64-a2-4] vllm_version: [main, v0.9.2] name: multicard e2e test runs-on: ${{ matrix.os }} diff --git a/.github/workflows/vllm_ascend_test_long_term.yaml b/.github/workflows/vllm_ascend_test_long_term.yaml index 9a33b3aca8..c328d6e131 100644 --- a/.github/workflows/vllm_ascend_test_long_term.yaml +++ b/.github/workflows/vllm_ascend_test_long_term.yaml @@ -42,7 +42,7 @@ jobs: strategy: max-parallel: 2 matrix: - os: [linux-arm64-npu-1, linux-arm64-npu-4] + os: [linux-arrch64-a2-1, linux-arm64-npu-2] vllm_version: [main, v0.9.2] name: vLLM Ascend long term test runs-on: ${{ matrix.os }} diff --git a/benchmarks/scripts/run_accuracy.py b/benchmarks/scripts/run_accuracy.py index 2922c52c6c..1d60cb479c 100644 --- a/benchmarks/scripts/run_accuracy.py +++ b/benchmarks/scripts/run_accuracy.py @@ -50,17 +50,17 @@ # Command templates for running evaluations MODEL_RUN_INFO = { "Qwen/Qwen3-30B-A3B": - ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n" + ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n" "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n" "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1" ), "Qwen/Qwen3-8B-Base": - ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n" + ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=1,gpu_memory_utilization=0.6'\n" "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n" "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1" ), "Qwen/Qwen2.5-VL-7B-Instruct": - ("export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n" + ("export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=1,max_images=2'\n" "lm_eval --model vllm-vlm --model_args $MODEL_ARGS --tasks {datasets} \ \n" "--apply_chat_template --fewshot_as_multiturn --batch_size 1"), } @@ -102,11 +102,11 @@ # Model arguments for evaluation MODEL_ARGS = { "Qwen/Qwen3-8B-Base": - "pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6", + "pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=1,gpu_memory_utilization=0.6", "Qwen/Qwen2.5-VL-7B-Instruct": - "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2", + "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=1,max_images=2", "Qwen/Qwen3-30B-A3B": - "pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True" + "pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6,enable_expert_parallel=True" } # Whether to apply chat template formatting From 50c51566adec79cb5aee01ef7370431d3b75c474 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 8 Jul 2025 17:12:16 +0800 Subject: [PATCH 2/3] fix Signed-off-by: wangli --- .github/workflows/vllm_ascend_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 9bb809296d..3933cc2cd5 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -301,7 +301,7 @@ jobs: strategy: max-parallel: 2 matrix: - os: [linux-arrch64-a2-4] + os: [linux-arrch64-a2-2] vllm_version: [main, v0.9.2] name: multicard e2e test runs-on: ${{ matrix.os }} From 5fd5d75b76ebddc945affa2e1964ef8c682bb73a Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 8 Jul 2025 17:15:06 +0800 Subject: [PATCH 3/3] fix typo Signed-off-by: wangli --- .github/workflows/vllm_ascend_test_long_term.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vllm_ascend_test_long_term.yaml b/.github/workflows/vllm_ascend_test_long_term.yaml index c328d6e131..788a98a371 100644 --- a/.github/workflows/vllm_ascend_test_long_term.yaml +++ b/.github/workflows/vllm_ascend_test_long_term.yaml @@ -42,7 +42,7 @@ jobs: strategy: max-parallel: 2 matrix: - os: [linux-arrch64-a2-1, linux-arm64-npu-2] + os: [linux-arrch64-a2-1, linux-arrch64-a2-2] vllm_version: [main, v0.9.2] name: vLLM Ascend long term test runs-on: ${{ matrix.os }}