Skip to content

Commit e42389f

Browse files
authored
Transformers backend already supports V1 (#15463)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
1 parent ff38f0a commit e42389f

File tree

3 files changed

+7
-25
lines changed

3 files changed

+7
-25
lines changed

tests/models/test_transformers.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
44
Run `pytest tests/models/test_transformers.py`.
55
"""
6-
from contextlib import nullcontext
7-
86
import pytest
97

108
from ..conftest import HfRunner, VllmRunner
@@ -42,7 +40,6 @@ def check_implementation(
4240
"model,model_impl",
4341
[
4442
("meta-llama/Llama-3.2-1B-Instruct", "transformers"),
45-
("openai-community/gpt2", "transformers"),
4643
("ArthurZ/Ilama-3.2-1B", "auto"), # CUSTOM CODE
4744
]) # trust_remote_code=True by default
4845
def test_models(
@@ -52,20 +49,11 @@ def test_models(
5249
model: str,
5350
model_impl: str,
5451
) -> None:
55-
56-
maybe_raises = nullcontext()
57-
if model == "openai-community/gpt2" and model_impl == "transformers":
58-
# Model is not backend compatible
59-
maybe_raises = pytest.raises(
60-
ValueError,
61-
match="The Transformers implementation.*not compatible with vLLM")
62-
63-
with maybe_raises:
64-
check_implementation(hf_runner,
65-
vllm_runner,
66-
example_prompts,
67-
model,
68-
model_impl=model_impl)
52+
check_implementation(hf_runner,
53+
vllm_runner,
54+
example_prompts,
55+
model,
56+
model_impl=model_impl)
6957

7058

7159
@multi_gpu_test(num_gpus=2)

vllm/engine/arg_utils.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,14 +1613,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
16131613
recommend_to_remove=False)
16141614
return False
16151615

1616-
# No TransformersModel support so far.
1617-
if (model_config.model_impl == ModelImpl.TRANSFORMERS
1618-
or model_config.model_impl == "transformers"):
1619-
_raise_or_fallback(
1620-
feature_name=f"model_impl={model_config.model_impl}",
1621-
recommend_to_remove=False)
1622-
return False
1623-
16241616
# No Concurrent Partial Prefills so far.
16251617
if (self.max_num_partial_prefills
16261618
!= EngineArgs.max_num_partial_prefills

vllm/model_executor/models/transformers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
2525

2626
from vllm.attention import Attention
27+
from vllm.compilation.decorators import support_torch_compile
2728
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
2829
ParallelConfig, VllmConfig)
2930
from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
@@ -109,6 +110,7 @@ def replace_linear_class(
109110
)
110111

111112

113+
@support_torch_compile
112114
class TransformersModel(nn.Module, SupportsQuant, SupportsLoRA, SupportsPP):
113115
embedding_padding_modules = ["lm_head"]
114116
embedding_modules = ["embed_tokens"

0 commit comments

Comments
 (0)