We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e7ad830 commit de28b86Copy full SHA for de28b86
vllm/model_executor/models/mlp_speculator.py
@@ -187,9 +187,11 @@ def generate_proposals(
187
# TODO: not yet supporting top_k_tokens_per_head
188
states = states.flatten(0, 1)
189
190
- logits = self.logits_processor(
191
- self.head[head_index], states, sampling_metadata
192
- if self.sampling_metadata_is_required else None)
+ if self.logits_processor:
+ logits = self.logits_processor(self.head[head_index], states,
+ sampling_metadata)
193
+ else:
194
+ logits = self.head[head_index](states)
195
196
output = self.sampler(logits, sampling_metadata)
197
last_tokens = output.sampled_token_ids
0 commit comments