add qwen3 model tests

Potabk · Potabk · commit 892e873d9dd2 · 2025-07-03T14:20:32.000+08:00
Signed-off-by: wangli &lt;wangli858794774@gmail.com&gt;
diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml
@@ -65,6 +65,8 @@ jobs:
       options: >-
         --device /dev/davinci0
         --device /dev/davinci1
+        --device /dev/davinci2
+        --device /dev/davinci3
         --device /dev/davinci_manager
         --device /dev/devmm_svm
         --device /dev/hisi_hdc
diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py
@@ -6,6 +6,8 @@
 
 # Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
 
+VLLM_EDITABLE_PATH = "/_w/vllm-ascend/vllm-ascend/vllm-empty/vllm/benchmarks/datasets.py"
+
 
 # TDOO(Potabk): Remove this patch when the issue is fixed in the upstream
 class StreamingFalseTransformer(cst.CSTTransformer):
@@ -68,10 +70,9 @@ def patch_file(path):
         description=
         "Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
     )
-    parser.add_argument(
-        "--path",
-        type=str,
-        default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
-        help="Path to the benchmark_dataset.py file")
+    parser.add_argument("--path",
+                        type=str,
+                        default=VLLM_EDITABLE_PATH,
+                        help="Path to the benchmark_dataset.py file")
     args = parser.parse_args()
     patch_file(args.path)
diff --git a/benchmarks/tests/latency-tests.json b/benchmarks/tests/latency-tests.json
@@ -19,5 +19,25 @@
       "num_iters_warmup": 5,
       "num_iters": 15
     }
+  },
+  {
+    "test_name": "latency_qwen3_30B_A3B_tp4",
+    "parameters": {
+      "model": "Qwen/Qwen3-30B-A3B",
+      "tensor_parallel_size": 4,
+      "load_format": "dummy",
+      "num_iters_warmup": 5,
+      "num_iters": 15
+    }
+  },
+  {
+    "test_name": "latency_qwen3_32B_tp4",
+    "parameters": {
+      "model": "Qwen/Qwen3-32B",
+      "tensor_parallel_size": 4,
+      "load_format": "dummy",
+      "num_iters_warmup": 5,
+      "num_iters": 15
+    }
   }
 ]
diff --git a/benchmarks/tests/serving-tests.json b/benchmarks/tests/serving-tests.json
@@ -73,5 +73,53 @@
       "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
       "num_prompts": 200
     }
+  },
+  {
+    "test_name": "serving_qwen3_30B_A3B_tp1",
+    "qps_list": [
+      1,
+      4,
+      16,
+      "inf"
+    ],
+    "server_parameters": {
+      "model": "Qwen/Qwen3-30B-A3B",
+      "tensor_parallel_size": 4,
+      "swap_space": 16,
+      "disable_log_stats": "",
+      "disable_log_requests": "",
+      "load_format": "dummy"
+    },
+    "client_parameters": {
+      "model": "Qwen/Qwen3-30B-A3B",
+      "endpoint_type": "vllm",
+      "dataset_name": "sharegpt",
+      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
+      "num_prompts": 200
+    }
+  },
+  {
+    "test_name": "serving_qwen3_32B_tp1",
+    "qps_list": [
+      1,
+      4,
+      16,
+      "inf"
+    ],
+    "server_parameters": {
+      "model": "Qwen/Qwen3-32B",
+      "tensor_parallel_size": 4,
+      "swap_space": 16,
+      "disable_log_stats": "",
+      "disable_log_requests": "",
+      "load_format": "dummy"
+    },
+    "client_parameters": {
+      "model": "Qwen/Qwen3-32B",
+      "endpoint_type": "vllm",
+      "dataset_name": "sharegpt",
+      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
+      "num_prompts": 200
+    }
   }
 ]
diff --git a/benchmarks/tests/throughput-tests.json b/benchmarks/tests/throughput-tests.json
@@ -33,6 +33,28 @@
       "num_prompts": 200,
       "backend": "vllm"
     }
+  },
+  {
+    "test_name": "throughput_qwen3_30B_A3B_tp1",
+    "parameters": {
+      "model": "Qwen/Qwen3-30B-A3B",
+      "tensor_parallel_size": 4,
+      "load_format": "dummy",
+      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
+      "num_prompts": 200,
+      "backend": "vllm"
+    }
+  },
+  {
+    "test_name": "throughput_qwen3_32B_tp1",
+    "parameters": {
+      "model": "Qwen/Qwen3-32B",
+      "tensor_parallel_size": 4,
+      "load_format": "dummy",
+      "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
+      "num_prompts": 200,
+      "backend": "vllm"
+    }
   }
 ]