File tree Expand file tree Collapse file tree 4 files changed +0
-303
lines changed Expand file tree Collapse file tree 4 files changed +0
-303
lines changed Original file line number Diff line number Diff line change 73
73
# Future Plan:
74
74
# Keep this patch in vllm-ascend.
75
75
#
76
- # ** File: worker/patch_common/patch_multi_step_worker.py **
77
- # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
78
- # 1. `vllm.spec_decode.multi_step_worker.MultiStepWorker.sampler_output`
79
- # Why:
80
- # There are cuda hard code (current_platform.is_cuda_alike()) in
81
- # `MultiStepWorker.sampler_output`, and we need to use the patched `TP1DraftModelRunner` in it.
82
- # How:
83
- # Make speculative decoding extensible to different backends.
84
- # - support attention metadata register to the set supported spec decode
85
- # - offer a api in platform to determine whether spec decode is supported,
86
- # and deprecate is_cuda_alike in it.
87
- # Related PR (if no, explain why):
88
- # - https://github.com/vllm-project/vllm/pull/15195
89
- # - https://github.com/vllm-project/vllm-ascend/pull/395
90
- # Future Plan:
91
- # Revert it when the related pr is merged in vllm and vllm-ascend.
92
- #
93
76
# ** File: worker/patch_common/patch_spec_decode_worker.py **
94
77
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
95
78
# 1. `vllm.spec_decode.spec_decode_worker.SpecDecodeWorker.create_worker`
Original file line number Diff line number Diff line change 20
20
import vllm_ascend .patch .worker .patch_common .patch_utils # noqa isort:skip
21
21
import vllm_ascend .patch .worker .patch_common .patch_distributed # noqa
22
22
import vllm_ascend .patch .worker .patch_common .patch_minicpm # noqa
23
- import vllm_ascend .patch .worker .patch_common .patch_multi_step_worker # noqa
24
23
import vllm_ascend .patch .worker .patch_common .patch_sampler # noqa
25
24
import vllm_ascend .patch .worker .patch_common .patch_spec_decode_worker # noqa
Load Diff This file was deleted.
Load Diff This file was deleted.
You can’t perform that action at this time.
0 commit comments