We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d92d836 commit dd4e9e8Copy full SHA for dd4e9e8
vllm_spyre/v1/worker/spyre_model_runner.py
@@ -879,7 +879,9 @@ def _prepare_decode(
879
slot = [start_slot + offset]
880
slot_mapping.append(slot)
881
output_token_ids = req_state.output_token_ids
882
- generation_token = output_token_ids[-1]
+ generation_token = torch.tensor(
883
+ output_token_ids[-1], dtype=torch.long, device=self.device
884
+ )
885
input_tokens.append([generation_token])
886
seq_len = cached_request_data.num_computed_tokens[
887
cached_reqs_map[req_id]]
0 commit comments