Skip to content

Commit be850dc

Browse files
committed
address some comments
Signed-off-by: raushan <raushan@huggingface.co>
1 parent 8d5d67e commit be850dc

File tree

4 files changed

+29
-15
lines changed

4 files changed

+29
-15
lines changed

tests/models/test_transformers.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,14 @@ def test_models(
7575
@pytest.mark.parametrize(
7676
"model,model_impl",
7777
[
78-
("llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
79-
"transformers"), # dynamic image length and number of patches
80-
("HuggingFaceTB/SmolVLM-256M-Instruct",
81-
"transformers"), # has col/row special token between patches
82-
("Qwen/Qwen2.5-VL-3B-Instruct", "transformers"
83-
), # pixel values from processor are not 4D or 5D arraya
78+
# Dynamic image length and number of patches
79+
("llava-hf/llava-onevision-qwen2-0.5b-ov-hf", "transformers"),
80+
# Has col/row special token between patches
81+
("HuggingFaceTB/SmolVLM-256M-Instruct", "transformers"),
82+
# Pixel values from processor are not 4D or 5D arrays
83+
("Qwen/Qwen2.5-VL-3B-Instruct", "transformers"),
84+
# Check "auto" with fallback to transformers
85+
("BAAI/Emu3-Chat-hf", "auto"),
8486
]
8587
) # no custom code support because custom models don't follow the standard yet!
8688
def test_models_multimodal(

vllm/model_executor/model_loader/utils.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,13 @@ def resolve_transformers_arch(model_config: ModelConfig,
200200
raise ValueError(
201201
f"The Transformers implementation of {arch} is not "
202202
"compatible with vLLM.")
203-
architectures[i] = "TransformersForMultimodalLM"
203+
# Check if text-config is `self`. If not most probably it is
204+
# a composite config, i.e. mutlimodal
205+
if model_config.hf_config.get_text_config(
206+
) != model_config.hf_config:
207+
architectures[i] = "TransformersForMultimodalLM"
208+
else:
209+
architectures[i] = "TransformersForCausalLM"
204210
if model_config.model_impl == ModelImpl.AUTO:
205211
if not model_module.is_backend_compatible():
206212
raise ValueError(
@@ -211,7 +217,13 @@ def resolve_transformers_arch(model_config: ModelConfig,
211217
"%s has no vLLM implementation, falling back to Transformers "
212218
"implementation. Some features may not be supported and "
213219
"performance may not be optimal.", arch)
214-
architectures[i] = "TransformersForMultimodalLM"
220+
# Check if text-config is `self`. If not most probably it is
221+
# a composite config, i.e. mutlimodal
222+
if model_config.hf_config.get_text_config(
223+
) != model_config.hf_config:
224+
architectures[i] = "TransformersForMultimodalLM"
225+
else:
226+
architectures[i] = "TransformersForCausalLM"
215227
return architectures
216228

217229

vllm/model_executor/models/transformers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -318,11 +318,11 @@ def apply(
318318
hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
319319

320320
(prompt_ids, processed_data,
321-
mm_token_type_ids) = self._apply_hf_processor_text_mm(
322-
prompt_text=prompt,
323-
mm_items=mm_items,
324-
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
325-
)
321+
mm_token_type_ids) = self._apply_hf_processor_text_mm(
322+
prompt_text=prompt,
323+
mm_items=mm_items,
324+
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
325+
)
326326

327327
# HF processor will return `mm_token_type_ids` from which
328328
# we can infer mm_placeholders. Until then hardcode to make code run

vllm/v1/engine/mm_input_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class MirroredProcessingCache:
3434

3535
def __init__(self, model_config):
3636
mm_config = model_config.multimodal_config
37-
disable_mm_preprocessor_cache = (
38-
mm_config is not None and mm_config.disable_mm_preprocessor_cache)
37+
disable_mm_preprocessor_cache = mm_config is not None and \
38+
not mm_config.disable_mm_preprocessor_cache
3939
self.use_cache = not disable_mm_preprocessor_cache
4040
self.mm_cache = ProcessingCache.get_lru_cache(VLLM_MM_INPUT_CACHE_GIB,
4141
MultiModalKwargs)

0 commit comments

Comments
 (0)