File tree 2 files changed +31
-7
lines changed
2 files changed +31
-7
lines changed Original file line number Diff line number Diff line change @@ -176,20 +176,22 @@ def test_embedding_model(gpu_type, model_opt_125m):
176
176
assert all ("prompt" in out for out in outs )
177
177
178
178
179
- def test_vision_model (gpu_type , model_llava_354m ):
179
+ def test_vision_model (gpu_type , model_smolvlm_256m ):
180
180
processor_config = vLLMEngineProcessorConfig (
181
- model_source = model_llava_354m ,
181
+ model_source = model_smolvlm_256m ,
182
182
task_type = "generate" ,
183
183
engine_kwargs = dict (
184
184
# Skip CUDA graph capturing to reduce startup time.
185
185
enforce_eager = True ,
186
+ # CI uses T4 GPU which does not support bfloat16.
187
+ dtype = "half" ,
186
188
),
187
189
# CI uses T4 GPU which is not supported by vLLM v1 FlashAttn.
188
- # runtime_env=dict(
189
- # env_vars=dict(
190
- # VLLM_USE_V1="1 ",
191
- # ),
192
- # ),
190
+ runtime_env = dict (
191
+ env_vars = dict (
192
+ VLLM_USE_V1 = "0 " ,
193
+ ),
194
+ ),
193
195
apply_chat_template = True ,
194
196
has_image = True ,
195
197
tokenize = False ,
Original file line number Diff line number Diff line change 5
5
from typing import Generator , List
6
6
7
7
S3_ARTIFACT_URL = "https://air-example-data.s3.amazonaws.com/"
8
+ S3_ARTIFACT_LLM_OSSCI_URL = S3_ARTIFACT_URL + "rayllm-ossci/"
8
9
9
10
10
11
def download_model_from_s3 (
@@ -69,6 +70,27 @@ def model_llava_354m():
69
70
yield from download_model_from_s3 (REMOTE_URL , FILE_LIST )
70
71
71
72
73
+ @pytest .fixture (scope = "session" )
74
+ def model_smolvlm_256m ():
75
+ """The vision language model for testing."""
76
+ REMOTE_URL = f"{ S3_ARTIFACT_LLM_OSSCI_URL } smolvlm-256m-instruct/"
77
+ FILE_LIST = [
78
+ "added_tokens.json" ,
79
+ "chat_template.json" ,
80
+ "config.json" ,
81
+ "generation_config.json" ,
82
+ "merges.txt" ,
83
+ "model.safetensors" ,
84
+ "preprocessor_config.json" ,
85
+ "processor_config.json" ,
86
+ "special_tokens_map.json" ,
87
+ "tokenizer.json" ,
88
+ "tokenizer_config.json" ,
89
+ "vocab.json" ,
90
+ ]
91
+ yield from download_model_from_s3 (REMOTE_URL , FILE_LIST )
92
+
93
+
72
94
@pytest .fixture (scope = "session" )
73
95
def model_llama_3_2_216M ():
74
96
"""The llama 3.2 216M model for testing."""
You can’t perform that action at this time.
0 commit comments