Skip to content

Commit c623c60

Browse files
Better support for skip_tokenizer_init=True
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
1 parent e7e3e6d commit c623c60

File tree

5 files changed

+28
-13
lines changed

5 files changed

+28
-13
lines changed

vllm/config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ def __post_init__(self) -> None:
642642
self.original_max_model_len = self.max_model_len
643643
self.max_model_len = self.get_and_verify_max_len(self.max_model_len)
644644
self.multimodal_config = self._init_multimodal_config()
645+
self.model_supports_multimodal_raw_input = self._init_model_supports_multimodal_raw_input()
645646
if not self.skip_tokenizer_init:
646647
self._verify_tokenizer_mode()
647648

@@ -753,6 +754,9 @@ def _init_multimodal_config(self) -> Optional["MultiModalConfig"]:
753754

754755
return None
755756

757+
def _init_model_supports_multimodal_raw_input(self):
758+
return self.registry.supports_multimodal_raw_input(self.architectures)
759+
756760
def _get_encoder_config(self):
757761
return get_sentence_transformer_tokenizer_config(
758762
self.model, self.revision)
@@ -1201,10 +1205,10 @@ def get_sliding_window(self) -> Optional[Union[int, list[Optional[int]]]]:
12011205
return self.get_hf_config_sliding_window()
12021206

12031207
def get_vocab_size(self) -> int:
1204-
return self.hf_text_config.vocab_size
1208+
return getattr(self.hf_text_config, "vocab_size", 0)
12051209

12061210
def get_hidden_size(self) -> int:
1207-
return self.hf_text_config.hidden_size
1211+
return getattr(self.hf_text_config, "hidden_size", 0)
12081212

12091213
@property
12101214
def is_deepseek_mla(self) -> bool:

vllm/multimodal/registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def create_processor(
266266
if not model_config.is_multimodal_model:
267267
raise ValueError(f"{model_config.model} is not a multimodal model")
268268

269-
if tokenizer is None:
269+
if tokenizer is None and not model_config.skip_tokenizer_init:
270270
tokenizer = cached_tokenizer_from_config(model_config)
271271
if disable_cache is None:
272272
mm_config = model_config.get_multimodal_config()

vllm/v1/engine/llm_engine.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,15 @@ def __init__(
8282
self.dp_group = None
8383
self.should_execute_dummy_batch = False
8484

85-
# Tokenizer (+ ensure liveness if running in another process).
86-
self.tokenizer = init_tokenizer_from_configs(
87-
model_config=vllm_config.model_config,
88-
scheduler_config=vllm_config.scheduler_config,
89-
lora_config=vllm_config.lora_config)
85+
86+
if not self.vllm_config.model_config.skip_tokenizer_init:
87+
# Tokenizer (+ ensure liveness if running in another process).
88+
self.tokenizer = init_tokenizer_from_configs(
89+
model_config=vllm_config.model_config,
90+
scheduler_config=vllm_config.scheduler_config,
91+
lora_config=vllm_config.lora_config)
92+
else:
93+
self.tokenizer = None
9094

9195
# Processor (convert Inputs --> EngineCoreRequests)
9296
self.processor = Processor(vllm_config=vllm_config,

vllm/v1/engine/output_processor.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,11 @@ def add_request(
327327
if request_id in self.request_states:
328328
raise ValueError(f"Request id {request_id} already running.")
329329

330+
tokenizer = None if not self.tokenizer else \
331+
self.tokenizer.get_lora_tokenizer(request.lora_request)
332+
330333
req_state = RequestState.from_new_request(
331-
tokenizer=self.tokenizer.get_lora_tokenizer(request.lora_request),
334+
tokenizer=tokenizer,
332335
request=request,
333336
prompt=prompt,
334337
parent_req=parent_req,

vllm/v1/engine/processor.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,10 @@ def _validate_model_input(
380380
prompt_type: Literal["encoder", "decoder"],
381381
):
382382
model_config = self.model_config
383-
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
383+
if model_config.skip_tokenizer_init:
384+
tokenizer = None
385+
else:
386+
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
384387

385388
prompt_ids = prompt_inputs["prompt_token_ids"]
386389
if not prompt_ids:
@@ -389,9 +392,10 @@ def _validate_model_input(
389392
else:
390393
raise ValueError(f"The {prompt_type} prompt cannot be empty")
391394

392-
max_input_id = max(prompt_ids, default=0)
393-
if max_input_id > tokenizer.max_token_id:
394-
raise ValueError(f"Token id {max_input_id} is out of vocabulary")
395+
if tokenizer:
396+
max_input_id = max(prompt_ids, default=0)
397+
if max_input_id > tokenizer.max_token_id:
398+
raise ValueError(f"Token id {max_input_id} is out of vocabulary")
395399

396400
max_prompt_len = self.model_config.max_model_len
397401
if len(prompt_ids) > max_prompt_len:

0 commit comments

Comments
 (0)