froce use V0 in serve tests

lk-chen · lk-chen · commit f743e6b0c2f2 · 2025-03-27T11:48:24.000-07:00
Signed-off-by: Linkun Chen &lt;github@lkchen.net&gt;
diff --git a/release/llm_tests/serve/configs/model_config/llama_3dot1_8b_lora.yaml b/release/llm_tests/serve/configs/model_config/llama_3dot1_8b_lora.yaml
@@ -4,6 +4,10 @@ model_loading_config:
 
 accelerator_type: A10G
 
+runtime_env:
+  env_vars:
+    VLLM_USE_V1: "0"
+
 engine_kwargs:
   max_model_len: 2048
   enable_lora: true
diff --git a/release/llm_tests/serve/configs/model_config/llama_3dot1_8b_quantized_tp1.yaml b/release/llm_tests/serve/configs/model_config/llama_3dot1_8b_quantized_tp1.yaml
@@ -3,5 +3,9 @@ model_loading_config:
 
 accelerator_type: A10G
 
+runtime_env:
+  env_vars:
+    VLLM_USE_V1: "0"
+
 engine_kwargs:
   max_model_len: 8192
diff --git a/release/llm_tests/serve/configs/model_config/llama_3dot1_8b_tp2.yaml b/release/llm_tests/serve/configs/model_config/llama_3dot1_8b_tp2.yaml
@@ -3,6 +3,10 @@ model_loading_config:
 
 accelerator_type: A10G
 
+runtime_env:
+  env_vars:
+    VLLM_USE_V1: "0"
+
 engine_kwargs:
   max_model_len: 8192
   tensor_parallel_size: 2
diff --git a/release/llm_tests/serve/configs/serve_llama_3dot2_1b_s3.yaml b/release/llm_tests/serve/configs/serve_llama_3dot2_1b_s3.yaml
@@ -8,6 +8,9 @@ applications:
           accelerator_type: A10G
           engine_kwargs:
             max_model_len: 8192
+          runtime_env:
+            env_vars:
+              VLLM_USE_V1: "0"
     import_path: ray.serve.llm:build_openai_app
     name: llm-endpoint
     route_prefix: /