[llm][test] Use smolVLM in unit tests (#51856)

lk-chen · web-flow · commit bedf57913728 · 2025-03-31T15:05:40.000-07:00
Signed-off-by: Linkun Chen &lt;github@lkchen.net&gt;
diff --git a/python/ray/llm/tests/batch/gpu/processor/test_vllm_engine_proc.py b/python/ray/llm/tests/batch/gpu/processor/test_vllm_engine_proc.py
@@ -176,20 +176,22 @@ def test_embedding_model(gpu_type, model_opt_125m):
     assert all("prompt" in out for out in outs)
 
 
-def test_vision_model(gpu_type, model_llava_354m):
+def test_vision_model(gpu_type, model_smolvlm_256m):
     processor_config = vLLMEngineProcessorConfig(
-        model_source=model_llava_354m,
+        model_source=model_smolvlm_256m,
         task_type="generate",
         engine_kwargs=dict(
             # Skip CUDA graph capturing to reduce startup time.
             enforce_eager=True,
+            # CI uses T4 GPU which does not support bfloat16.
+            dtype="half",
         ),
         # CI uses T4 GPU which is not supported by vLLM v1 FlashAttn.
-        # runtime_env=dict(
-        #     env_vars=dict(
-        #         VLLM_USE_V1="1",
-        #     ),
-        # ),
+        runtime_env=dict(
+            env_vars=dict(
+                VLLM_USE_V1="0",
+            ),
+        ),
         apply_chat_template=True,
         has_image=True,
         tokenize=False,
diff --git a/python/ray/llm/tests/conftest.py b/python/ray/llm/tests/conftest.py
@@ -5,6 +5,7 @@
 from typing import Generator, List
 
 S3_ARTIFACT_URL = "https://air-example-data.s3.amazonaws.com/"
+S3_ARTIFACT_LLM_OSSCI_URL = S3_ARTIFACT_URL + "rayllm-ossci/"
 
 
 def download_model_from_s3(
@@ -69,6 +70,27 @@ def model_llava_354m():
     yield from download_model_from_s3(REMOTE_URL, FILE_LIST)
 
 
+@pytest.fixture(scope="session")
+def model_smolvlm_256m():
+    """The vision language model for testing."""
+    REMOTE_URL = f"{S3_ARTIFACT_LLM_OSSCI_URL}smolvlm-256m-instruct/"
+    FILE_LIST = [
+        "added_tokens.json",
+        "chat_template.json",
+        "config.json",
+        "generation_config.json",
+        "merges.txt",
+        "model.safetensors",
+        "preprocessor_config.json",
+        "processor_config.json",
+        "special_tokens_map.json",
+        "tokenizer.json",
+        "tokenizer_config.json",
+        "vocab.json",
+    ]
+    yield from download_model_from_s3(REMOTE_URL, FILE_LIST)
+
+
 @pytest.fixture(scope="session")
 def model_llama_3_2_216M():
     """The llama 3.2 216M model for testing."""