From 539b9fdef2115fbb7578efe114422db57165d36d Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Sun, 22 Jun 2025 15:34:18 -0700 Subject: [PATCH 1/5] add gemma-3-27b-it and qwen3_30B-A3B --- vllm-benchmarks/benchmarks/serving-tests.json | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/vllm-benchmarks/benchmarks/serving-tests.json b/vllm-benchmarks/benchmarks/serving-tests.json index 9456bb8..adddf1b 100644 --- a/vllm-benchmarks/benchmarks/serving-tests.json +++ b/vllm-benchmarks/benchmarks/serving-tests.json @@ -117,5 +117,125 @@ "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } + }, + { + "test_name": "serving_qwen3_30B-A3B_tp8_in200_out200", + "qps_list": ["inf"], + "server_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_qwen3_30B-A3B_tp8_in1k_out2k", + "qps_list": ["inf"], + "server_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_qwen3_30B-A3B_tp8_in5k_out1k", + "qps_list": ["inf"], + "server_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 5120, + "random_output_len": 1024 + } + }, + { + "test_name": "serving_google/gemma_3_27b_it_tp8_in200_out200", + "qps_list": ["inf"], + "server_parameters": { + "model": "google/gemma-3-27b-it", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-27b-it", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_google/gemma_3_27b_it_tp8_in1k_out2k", + "qps_list": ["inf"], + "server_parameters": { + "model": "google/gemma-3-27b-it", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-27b-it", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_google/gemma_3_27b_it_tp8_in5k_out1k", + "qps_list": ["inf"], + "server_parameters": { + "model": "google/gemma-3-27b-it", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-27b-it", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 5120, + "random_output_len": 1024 + } } ] From 3d3bdeccc33045fec77c13d9c33ed6d1430861aa Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Mon, 23 Jun 2025 14:25:19 -0700 Subject: [PATCH 2/5] fix test name --- vllm-benchmarks/benchmarks/serving-tests.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm-benchmarks/benchmarks/serving-tests.json b/vllm-benchmarks/benchmarks/serving-tests.json index adddf1b..02d2ed6 100644 --- a/vllm-benchmarks/benchmarks/serving-tests.json +++ b/vllm-benchmarks/benchmarks/serving-tests.json @@ -179,7 +179,7 @@ } }, { - "test_name": "serving_google/gemma_3_27b_it_tp8_in200_out200", + "test_name": "serving_gemma_3_27b_it_tp8_in200_out200", "qps_list": ["inf"], "server_parameters": { "model": "google/gemma-3-27b-it", @@ -199,7 +199,7 @@ } }, { - "test_name": "serving_google/gemma_3_27b_it_tp8_in1k_out2k", + "test_name": "serving_gemma_3_27b_it_tp8_in1k_out2k", "qps_list": ["inf"], "server_parameters": { "model": "google/gemma-3-27b-it", @@ -219,7 +219,7 @@ } }, { - "test_name": "serving_google/gemma_3_27b_it_tp8_in5k_out1k", + "test_name": "serving_gemma_3_27b_it_tp8_in5k_out1k", "qps_list": ["inf"], "server_parameters": { "model": "google/gemma-3-27b-it", From 21a97da5959bc62e6535a98b269b0b1dd1d30abf Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 18 Jul 2025 22:27:22 -0700 Subject: [PATCH 3/5] add more models to cuda and rocm benchmarks --- .../benchmarks/cuda/serving-tests.json | 296 +++++++++++++++++- .../benchmarks/rocm/serving-tests.json | 254 ++++++++++++++- 2 files changed, 548 insertions(+), 2 deletions(-) diff --git a/vllm-benchmarks/benchmarks/cuda/serving-tests.json b/vllm-benchmarks/benchmarks/cuda/serving-tests.json index 9456bb8..66b7c4d 100644 --- a/vllm-benchmarks/benchmarks/cuda/serving-tests.json +++ b/vllm-benchmarks/benchmarks/cuda/serving-tests.json @@ -78,6 +78,90 @@ "num_prompts": 200 } }, + { + "test_name": "serving_qwen3_30b_a3b_tp8_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_gemma_3_27b_it_tp8_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "google/gemma-3-27b-it", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-27b-it", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_gemma_3_4b_it_tp1_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "google/gemma-3-4b-it", + "tensor_parallel_size": 1, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-4b-it", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_qwen3_8b_tp1_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "Qwen/Qwen3-8B", + "tensor_parallel_size": 1, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-8B", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, { "test_name": "serving_llama4_scout_tp4_sharegpt", "qps_list": [1, 4, 16, "inf"], @@ -99,7 +183,112 @@ } }, { - "test_name": "serving_llama4_maverick_fp8_tp8", + "test_name": "serving_llama4_scout_tp4_random_in200_out200", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in5k_out1k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 5120, + "random_output_len": 1024 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in10k_out500", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 11264 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 500 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in30k_out100", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 31744 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 30720, + "random_output_len": 100 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_sharegpt", "qps_list": [1, 4, 16, "inf"], "server_parameters": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", @@ -117,5 +306,110 @@ "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in200_out200", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in5k_out1k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 5120, + "random_output_len": 1024 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in10k_out500", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 11264 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 10240, + "random_output_len": 500 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in30k_out100", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 31744 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 30720, + "random_output_len": 100 + } } ] diff --git a/vllm-benchmarks/benchmarks/rocm/serving-tests.json b/vllm-benchmarks/benchmarks/rocm/serving-tests.json index 02d2ed6..4bc668d 100644 --- a/vllm-benchmarks/benchmarks/rocm/serving-tests.json +++ b/vllm-benchmarks/benchmarks/rocm/serving-tests.json @@ -99,7 +99,112 @@ } }, { - "test_name": "serving_llama4_maverick_fp8_tp8", + "test_name": "serving_llama4_scout_tp4_random_in200_out200", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in5k_out1k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 5120, + "random_output_len": 1024 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in10k_out500", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 11264 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 500 + } + }, + { + "test_name": "serving_llama4_scout_tp4_random_in30k_out100", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "tensor_parallel_size": 4, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 31744 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 30720, + "random_output_len": 100 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_sharegpt", "qps_list": [1, 4, 16, "inf"], "server_parameters": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", @@ -118,6 +223,111 @@ "num_prompts": 200 } }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in200_out200", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in5k_out1k", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 5120, + "random_output_len": 1024 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in10k_out500", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 11264 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 10240, + "random_output_len": 500 + } + }, + { + "test_name": "serving_llama4_maverick_fp8_tp8_random_in30k_out100", + "qps_list": [10], + "server_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 31744 + }, + "client_parameters": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 30720, + "random_output_len": 100 + } + }, { "test_name": "serving_qwen3_30B-A3B_tp8_in200_out200", "qps_list": ["inf"], @@ -237,5 +447,47 @@ "random_input_len": 5120, "random_output_len": 1024 } + }, + { + "test_name": "serving_gemma_3_4b_it_tp1_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "google/gemma-3-4b-it", + "tensor_parallel_size": 1, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-4b-it", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_qwen3_8b_tp1_random_in1k_out2k", + "qps_list": [10], + "server_parameters": { + "model": "Qwen/Qwen3-8B", + "tensor_parallel_size": 1, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-8B", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 + } } ] From 1acfd130a088b8854bfa244297af26ec8f09d48e Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Sat, 19 Jul 2025 19:04:00 -0700 Subject: [PATCH 4/5] rename test --- vllm-benchmarks/benchmarks/rocm/serving-tests.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm-benchmarks/benchmarks/rocm/serving-tests.json b/vllm-benchmarks/benchmarks/rocm/serving-tests.json index 4bc668d..4c856e0 100644 --- a/vllm-benchmarks/benchmarks/rocm/serving-tests.json +++ b/vllm-benchmarks/benchmarks/rocm/serving-tests.json @@ -329,7 +329,7 @@ } }, { - "test_name": "serving_qwen3_30B-A3B_tp8_in200_out200", + "test_name": "serving_qwen3_30B_A3B_tp8_in200_out200", "qps_list": ["inf"], "server_parameters": { "model": "Qwen/Qwen3-30B-A3B", @@ -349,7 +349,7 @@ } }, { - "test_name": "serving_qwen3_30B-A3B_tp8_in1k_out2k", + "test_name": "serving_qwen3_30B_A3B_tp8_in1k_out2k", "qps_list": ["inf"], "server_parameters": { "model": "Qwen/Qwen3-30B-A3B", @@ -369,7 +369,7 @@ } }, { - "test_name": "serving_qwen3_30B-A3B_tp8_in5k_out1k", + "test_name": "serving_qwen3_30B_A3B_tp8_in5k_out1k", "qps_list": ["inf"], "server_parameters": { "model": "Qwen/Qwen3-30B-A3B", From 3d110dcc83482fa9ff2eee3710c70fb4ae257286 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Sat, 19 Jul 2025 21:28:20 -0700 Subject: [PATCH 5/5] nit --- .../benchmarks/rocm/serving-tests.json | 103 ++---------------- 1 file changed, 12 insertions(+), 91 deletions(-) diff --git a/vllm-benchmarks/benchmarks/rocm/serving-tests.json b/vllm-benchmarks/benchmarks/rocm/serving-tests.json index 4c856e0..7b32d38 100644 --- a/vllm-benchmarks/benchmarks/rocm/serving-tests.json +++ b/vllm-benchmarks/benchmarks/rocm/serving-tests.json @@ -328,86 +328,6 @@ "random_output_len": 100 } }, - { - "test_name": "serving_qwen3_30B_A3B_tp8_in200_out200", - "qps_list": ["inf"], - "server_parameters": { - "model": "Qwen/Qwen3-30B-A3B", - "tensor_parallel_size": 8, - "swap_space": 16, - "disable_log_stats": "", - "disable_log_requests": "", - "load_format": "dummy", - "max_model_len": 8192 - }, - "client_parameters": { - "model": "Qwen/Qwen3-30B-A3B", - "backend": "vllm", - "dataset_name": "random", - "random_input_len": 200, - "random_output_len": 200 - } - }, - { - "test_name": "serving_qwen3_30B_A3B_tp8_in1k_out2k", - "qps_list": ["inf"], - "server_parameters": { - "model": "Qwen/Qwen3-30B-A3B", - "tensor_parallel_size": 8, - "swap_space": 16, - "disable_log_stats": "", - "disable_log_requests": "", - "load_format": "dummy", - "max_model_len": 8192 - }, - "client_parameters": { - "model": "Qwen/Qwen3-30B-A3B", - "backend": "vllm", - "dataset_name": "random", - "random_input_len": 1024, - "random_output_len": 2048 - } - }, - { - "test_name": "serving_qwen3_30B_A3B_tp8_in5k_out1k", - "qps_list": ["inf"], - "server_parameters": { - "model": "Qwen/Qwen3-30B-A3B", - "tensor_parallel_size": 8, - "swap_space": 16, - "disable_log_stats": "", - "disable_log_requests": "", - "load_format": "dummy", - "max_model_len": 8192 - }, - "client_parameters": { - "model": "Qwen/Qwen3-30B-A3B", - "backend": "vllm", - "dataset_name": "random", - "random_input_len": 5120, - "random_output_len": 1024 - } - }, - { - "test_name": "serving_gemma_3_27b_it_tp8_in200_out200", - "qps_list": ["inf"], - "server_parameters": { - "model": "google/gemma-3-27b-it", - "tensor_parallel_size": 8, - "swap_space": 16, - "disable_log_stats": "", - "disable_log_requests": "", - "load_format": "dummy", - "max_model_len": 8192 - }, - "client_parameters": { - "model": "google/gemma-3-27b-it", - "backend": "vllm", - "dataset_name": "random", - "random_input_len": 200, - "random_output_len": 200 - } - }, { "test_name": "serving_gemma_3_27b_it_tp8_in1k_out2k", "qps_list": ["inf"], @@ -429,11 +349,11 @@ } }, { - "test_name": "serving_gemma_3_27b_it_tp8_in5k_out1k", - "qps_list": ["inf"], + "test_name": "serving_gemma_3_4b_it_tp1_random_in1k_out2k", + "qps_list": [10], "server_parameters": { - "model": "google/gemma-3-27b-it", - "tensor_parallel_size": 8, + "model": "google/gemma-3-4b-it", + "tensor_parallel_size": 1, "swap_space": 16, "disable_log_stats": "", "disable_log_requests": "", @@ -441,19 +361,20 @@ "max_model_len": 8192 }, "client_parameters": { - "model": "google/gemma-3-27b-it", + "model": "google/gemma-3-4b-it", "backend": "vllm", "dataset_name": "random", - "random_input_len": 5120, - "random_output_len": 1024 + "num_prompts": 200, + "random_input_len": 1024, + "random_output_len": 2048 } }, { - "test_name": "serving_gemma_3_4b_it_tp1_random_in1k_out2k", + "test_name": "serving_qwen3_30b_a3b_tp8_random_in1k_out2k", "qps_list": [10], "server_parameters": { - "model": "google/gemma-3-4b-it", - "tensor_parallel_size": 1, + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, "swap_space": 16, "disable_log_stats": "", "disable_log_requests": "", @@ -461,7 +382,7 @@ "max_model_len": 8192 }, "client_parameters": { - "model": "google/gemma-3-4b-it", + "model": "Qwen/Qwen3-30B-A3B", "backend": "vllm", "dataset_name": "random", "num_prompts": 200,