Skip to content

Commit 4c68b8f

Browse files
Better support for skip_tokenizer_init=True
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
1 parent 783921d commit 4c68b8f

File tree

5 files changed

+28
-13
lines changed

5 files changed

+28
-13
lines changed

vllm/config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,7 @@ def __post_init__(self) -> None:
612612
self.served_model_name = get_served_model_name(self.model,
613613
self.served_model_name)
614614
self.multimodal_config = self._init_multimodal_config()
615+
self.model_supports_multimodal_raw_input = self._init_model_supports_multimodal_raw_input()
615616
if not self.skip_tokenizer_init:
616617
self._verify_tokenizer_mode()
617618

@@ -715,6 +716,9 @@ def _init_multimodal_config(self) -> Optional["MultiModalConfig"]:
715716

716717
return None
717718

719+
def _init_model_supports_multimodal_raw_input(self):
720+
return self.registry.supports_multimodal_raw_input(self.architectures)
721+
718722
def _get_encoder_config(self):
719723
return get_sentence_transformer_tokenizer_config(
720724
self.model, self.revision)
@@ -1120,10 +1124,10 @@ def get_sliding_window(self) -> Optional[Union[int, list[Optional[int]]]]:
11201124
return self.get_hf_config_sliding_window()
11211125

11221126
def get_vocab_size(self) -> int:
1123-
return self.hf_text_config.vocab_size
1127+
return getattr(self.hf_text_config, "vocab_size", 0)
11241128

11251129
def get_hidden_size(self) -> int:
1126-
return self.hf_text_config.hidden_size
1130+
return getattr(self.hf_text_config, "hidden_size", 0)
11271131

11281132
@property
11291133
def is_deepseek_mla(self) -> bool:

vllm/multimodal/registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def create_processor(
266266
if not model_config.is_multimodal_model:
267267
raise ValueError(f"{model_config.model} is not a multimodal model")
268268

269-
if tokenizer is None:
269+
if tokenizer is None and not model_config.skip_tokenizer_init:
270270
tokenizer = cached_tokenizer_from_config(model_config)
271271
if disable_cache is None:
272272
mm_config = model_config.get_multimodal_config()

vllm/v1/engine/llm_engine.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,15 @@ def __init__(
8282
self.dp_group = None
8383
self.should_execute_dummy_batch = False
8484

85-
# Tokenizer (+ ensure liveness if running in another process).
86-
self.tokenizer = init_tokenizer_from_configs(
87-
model_config=vllm_config.model_config,
88-
scheduler_config=vllm_config.scheduler_config,
89-
lora_config=vllm_config.lora_config)
85+
86+
if not self.vllm_config.model_config.skip_tokenizer_init:
87+
# Tokenizer (+ ensure liveness if running in another process).
88+
self.tokenizer = init_tokenizer_from_configs(
89+
model_config=vllm_config.model_config,
90+
scheduler_config=vllm_config.scheduler_config,
91+
lora_config=vllm_config.lora_config)
92+
else:
93+
self.tokenizer = None
9094

9195
# Processor (convert Inputs --> EngineCoreRequests)
9296
self.processor = Processor(vllm_config=vllm_config,

vllm/v1/engine/output_processor.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,11 @@ def add_request(
327327
if request_id in self.request_states:
328328
raise ValueError(f"Request id {request_id} already running.")
329329

330+
tokenizer = None if not self.tokenizer else \
331+
self.tokenizer.get_lora_tokenizer(request.lora_request)
332+
330333
req_state = RequestState.from_new_request(
331-
tokenizer=self.tokenizer.get_lora_tokenizer(request.lora_request),
334+
tokenizer=tokenizer,
332335
request=request,
333336
prompt=prompt,
334337
parent_req=parent_req,

vllm/v1/engine/processor.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,10 @@ def _validate_model_input(
375375
prompt_type: Literal["encoder", "decoder"],
376376
):
377377
model_config = self.model_config
378-
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
378+
if model_config.skip_tokenizer_init:
379+
tokenizer = None
380+
else:
381+
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
379382

380383
prompt_ids = prompt_inputs["prompt_token_ids"]
381384
if not prompt_ids:
@@ -384,9 +387,10 @@ def _validate_model_input(
384387
else:
385388
raise ValueError(f"The {prompt_type} prompt cannot be empty")
386389

387-
max_input_id = max(prompt_ids, default=0)
388-
if max_input_id > tokenizer.max_token_id:
389-
raise ValueError(f"Token id {max_input_id} is out of vocabulary")
390+
if tokenizer:
391+
max_input_id = max(prompt_ids, default=0)
392+
if max_input_id > tokenizer.max_token_id:
393+
raise ValueError(f"Token id {max_input_id} is out of vocabulary")
390394

391395
max_prompt_len = self.model_config.max_model_len
392396
if len(prompt_ids) > max_prompt_len:

0 commit comments

Comments
 (0)