Skip to content

Commit f9dfde0

Browse files
authored
[Bugfix] Fix broken CI (#1848)
### What this PR does / why we need it? - Fix broken commit by [#20927](vllm-project/vllm#20927) - Fix broken commit by [#20466](vllm-project/vllm#20466) - TODO: more fully adapt to the upstream reconstruction, let's first make CI happy - vLLM version: v0.9.2 - vLLM main: vllm-project/vllm@11dfdf2 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent 538dd35 commit f9dfde0

File tree

4 files changed

+10
-53
lines changed

4 files changed

+10
-53
lines changed

vllm_ascend/pool/__init__.py

Lines changed: 0 additions & 16 deletions
This file was deleted.

vllm_ascend/pool/metadata.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

vllm_ascend/worker/model_runner_v1.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
KVCacheSpec)
6060
from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, LogprobsTensors,
6161
ModelRunnerOutput)
62+
from vllm.v1.pool.metadata import PoolingMetadata
6263
from vllm.v1.sample.metadata import SamplingMetadata
6364
from vllm.v1.sample.sampler import Sampler
6465
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
@@ -76,7 +77,6 @@
7677
from vllm_ascend.attention.mla_v1 import (AscendMLAMetadata,
7778
CommonAttentionMetadata)
7879
from vllm_ascend.platform import NPUPlatform
79-
from vllm_ascend.pool.metadata import PoolingMetadata
8080
from vllm_ascend.sample.rejection_sampler import AscendRejectionSampler
8181
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
8282
ProfileExecuteDuration,
@@ -571,7 +571,10 @@ def get_eagle_atten_dict(
571571

572572
# OPTIMIZATION: Start copying the block table first.
573573
# This way, we can overlap the copy with the following CPU operations.
574-
self.input_batch.block_table.commit(num_reqs)
574+
if vllm_version_is("0.9.2"):
575+
self.input_batch.block_table.commit(num_reqs)
576+
else:
577+
self.input_batch.block_table.commit_block_table(num_reqs)
575578

576579
# Get the number of scheduled tokens for each request.
577580
req_ids = self.input_batch.req_ids
@@ -902,7 +905,10 @@ def _process_reqs(
902905

903906
# OPTIMIZATION: Start copying the block table first.
904907
# This way, we can overlap the copy with the following CPU operations.
905-
self.input_batch.block_table.commit(num_reqs)
908+
if vllm_version_is("0.9.2"):
909+
self.input_batch.block_table.commit(num_reqs)
910+
else:
911+
self.input_batch.block_table.commit_block_table(num_reqs)
906912

907913
# Get the number of scheduled tokens for each request.
908914
# TODO: The Python loop can be slow. Optimize.

vllm_ascend/worker/npu_input_batch.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,13 @@
2828
from vllm.sampling_params import SamplingParams, SamplingType
2929
from vllm.utils import swap_dict_values
3030
from vllm.v1.outputs import LogprobsTensors
31+
from vllm.v1.pool.metadata import PoolingMetadata
3132
from vllm.v1.sample.logits_processor import init_builtin_logitsprocs
3233
from vllm.v1.sample.metadata import SamplingMetadata
3334
from vllm.v1.spec_decode.utils import is_spec_decode_unsupported
3435
from vllm.v1.utils import copy_slice
3536
from vllm.v1.worker.block_table import MultiGroupBlockTable
3637

37-
from vllm_ascend.pool.metadata import PoolingMetadata
38-
3938
_SAMPLING_EPS = 1e-5
4039

4140

0 commit comments

Comments
 (0)