4
4
import struct
5
5
import time
6
6
from dataclasses import dataclass
7
- from typing import TYPE_CHECKING , Any , List , Tuple
7
+ from typing import TYPE_CHECKING , Any , List , Optional , Tuple , Union
8
8
9
9
import requests
10
10
import torch
11
11
import torch_npu
12
- import torchair
12
+ import torchair # type: ignore
13
13
from vllm .distributed import get_tensor_model_parallel_rank , get_world_group
14
14
from vllm .distributed .kv_transfer .kv_connector .v1 .base import (
15
15
KVConnectorBase_V1 , KVConnectorMetadata , KVConnectorRole )
@@ -67,7 +67,8 @@ class ServerInfo:
67
67
role : ServerRole
68
68
devices : List [DeviceInfo ]
69
69
70
- def get_device (self , tp_rank : int , dp_rank : int ) -> DeviceInfo :
70
+ def get_device (self , tp_rank : int ,
71
+ dp_rank : int ) -> Union [DeviceInfo , None ]:
71
72
for device in self .devices :
72
73
if device .tp_rank == tp_rank and device .dp_rank == dp_rank :
73
74
return device
@@ -162,7 +163,7 @@ def __init__(self, vllm_config: "VllmConfig") -> None:
162
163
GLOBAL_RANKTABLE , self .prefill_tp , self .decode_tp )
163
164
164
165
def get_device (self , server_id : str , dp_rank : int ,
165
- tp_rank : int ) -> DeviceInfo :
166
+ tp_rank : int ) -> Union [ DeviceInfo , None ] :
166
167
for server in self ._servers :
167
168
if server .server_id != server_id :
168
169
continue
@@ -225,7 +226,7 @@ def _get_first_matching_value(self, config_dict: dict,
225
226
return default
226
227
227
228
228
- _CLUSTER_INFO : "ClusterInfo" = None
229
+ _CLUSTER_INFO : Optional [ "ClusterInfo" ] = None
229
230
230
231
231
232
def init_cluster_info (vllm_config : "VllmConfig" ) -> None :
@@ -272,7 +273,7 @@ def __init__(self, role: llm_datadist.LLMRole, local_rank: int,
272
273
local_device_info = self .cluster_info .get_device (
273
274
local_server_id , dp_rank , tp_rank )
274
275
assert local_device_info is not None , \
275
- f "Could not find local device from cluster info."
276
+ "Could not find local device from cluster info."
276
277
277
278
self .cluster_id = local_device_info .cluster_id
278
279
self .local_device_ip = local_device_info .device_ip
@@ -379,8 +380,8 @@ def __init__(self, vllm_config: "VllmConfig",
379
380
self .kv_role = llm_datadist .LLMRole .DECODER
380
381
else :
381
382
raise ValueError (
382
- f "The value of kv_role must be either `kv_producer` or `kv_consumer`, but received { kv_transfer_config . kv_role } . "
383
- )
383
+ "The value of kv_role must be either `kv_producer` or"
384
+ f" `kv_consumer`, but received { kv_transfer_config . kv_role } ." )
384
385
385
386
# Used by scheduler process
386
387
self ._requests_need_load : dict [str , Request ] = {}
@@ -400,7 +401,7 @@ def __init__(self, vllm_config: "VllmConfig",
400
401
"local_server_id" , None )
401
402
assert (
402
403
self .local_server_id is not None
403
- ), f "Cannot find `local_server_id` from `kv_transfer_config.kv_connector_extra_config`."
404
+ ), "Cannot find `local_server_id` from `kv_transfer_config.kv_connector_extra_config`."
404
405
405
406
self .dp_rank = self ._vllm_config .parallel_config .data_parallel_rank
406
407
self .tp_size = self ._vllm_config .parallel_config .tensor_parallel_size
@@ -474,7 +475,7 @@ def start_load_kv(self, forward_context: "ForwardContext",
474
475
# this is the cause.
475
476
if prefill_infos is None :
476
477
logger .error (
477
- f "[rank%d][D]: Failed to get prefill info, redo model forwarding." ,
478
+ "[rank%d][D]: Failed to get prefill info, redo model forwarding." ,
478
479
torch .distributed .get_rank ())
479
480
return None
480
481
@@ -853,8 +854,8 @@ def _create_cache_tensors(self,
853
854
except LLMException as e :
854
855
if e .status_code == LLMStatusCode .LLM_DEVICE_OUT_OF_MEMORY :
855
856
logger .warning (
856
- f "allocate_cache failed due to insufficient space in the mbuf memory. "
857
- )
857
+ "allocate_cache failed due to insufficient space in the"
858
+ " mbuf memory." )
858
859
time .sleep (0.03 ) # wait for cache buf to be ready
859
860
else :
860
861
raise e
0 commit comments