add tests

zucchini-nlp · zucchini-nlp · commit 267a57f2fe56 · 2025-05-30T11:58:03.000+02:00
diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py
@@ -10,6 +10,7 @@
 from ..core.block.e2e.test_correctness_sliding_window import prep_prompts
 from ..utils import multi_gpu_test
 from .utils import check_logprobs_close
+from transformers import AutoModelForImageTextToText
 
 
 def check_implementation(
@@ -71,6 +72,27 @@ def test_models(
                          model_impl=model_impl)
 
 
+@pytest.mark.parametrize(
+    "model,model_impl",
+    [
+        ("llava-hf/llava-onevision-qwen2-0.5b-ov-hf", "transformers"), # dynamic image length and number of patches
+        ("HuggingFaceTB/SmolVLM-256M-Instruct", "transformers"), # has col/row special token between patches
+        ("Qwen/Qwen2.5-VL-3B-Instruct", "transformers"), # pixel values from processor are not 4D or 5D arraya
+    ]) # no custom code support because custom models don't follow the standard yet!
+def test_models_multimodal(
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    example_prompts: list[str],
+    model: str,
+    model_impl: str,
+) -> None:
+    check_implementation(hf_runner,
+                         vllm_runner,
+                         example_prompts,
+                         model,
+                         model_impl=model_impl,
+                         kwargs_ref={"auto_cls": AutoModelForImageTextToText},)
+
 def test_hybrid_attention(vllm_runner: type[VllmRunner]) -> None:
     prompts, _, _ = prep_prompts(4, (800, 801))
     kwargs_ref = {"max_model_len": 8192, "enforce_eager": True}