@@ -1100,24 +1100,29 @@ def get_allowed_mm_limits(self) -> Mapping[str, int]:
1100
1100
1101
1101
return allowed_limits
1102
1102
1103
- def get_max_tokens_per_item (
1104
- self , seq_len : int ,
1105
- mm_counts : Optional [Mapping [str ,
1106
- int ]]) -> Optional [Mapping [str , int ]]:
1107
- """Return the maximum number of tokens per item of for each modality.
1108
- By default, returns `None`. When `None` is returned, vLLM will generate
1109
- dummy inputs (images/videos) at maximum possible sizes and process them
1110
- to determine the maximum token count per modality.
1103
+ def get_mm_max_tokens_per_item (
1104
+ self ,
1105
+ seq_len : int ,
1106
+ mm_counts : Mapping [str , int ],
1107
+ ) -> Optional [Mapping [str , int ]]:
1108
+ """
1109
+ Return the maximum number of tokens per item of for each modality.
1110
+
1111
+ When `None` (the default) is returned, vLLM will generate dummy inputs
1112
+ (images/videos) at maximum possible sizes and process them to determine
1113
+ the maximum token count per modality.
1114
+
1111
1115
This approach works but can be very slow for certain models (e.g.,
1112
1116
Qwen2.5-VL), leading to very long startup time. For better performance,
1113
1117
each model can override this method to return pre-computed maximum token
1114
1118
counts, avoiding the need for dummy input generation and processing.
1115
1119
1116
- NOTE: The maximum number of tokens per item of each modality returned
1117
- from this function should respect to the model maximum sequence length
1118
- and the maximum number of items of each modality allowed, and agrees
1119
- with dummy inputs (images/videos) at maximum possible sizes.
1120
-
1120
+ Note:
1121
+ The maximum number of tokens per item of each modality returned
1122
+ from this function should respect the model's maximum sequence
1123
+ length and the maximum number of items of each modality allowed,
1124
+ and agree with dummy inputs (images/videos) at maximum possible
1125
+ sizes.
1121
1126
"""
1122
1127
return None
1123
1128
0 commit comments