4
4
import struct
5
5
import time
6
6
from dataclasses import dataclass
7
- from typing import TYPE_CHECKING , Any , List , Tuple
7
+ from typing import TYPE_CHECKING , Any , List , Optional , Tuple , Union
8
8
9
9
import requests
10
10
import torch
11
11
import torch_npu
12
- import torchair
12
+ import torchair # type: ignore
13
13
from vllm .distributed import get_tensor_model_parallel_rank , get_world_group
14
14
from vllm .distributed .kv_transfer .kv_connector .v1 .base import (
15
15
KVConnectorBase_V1 , KVConnectorMetadata , KVConnectorRole )
@@ -67,7 +67,7 @@ class ServerInfo:
67
67
role : ServerRole
68
68
devices : List [DeviceInfo ]
69
69
70
- def get_device (self , tp_rank : int , dp_rank : int ) -> DeviceInfo :
70
+ def get_device (self , tp_rank : int , dp_rank : int ) -> Union [ DeviceInfo , None ] :
71
71
for device in self .devices :
72
72
if device .tp_rank == tp_rank and device .dp_rank == dp_rank :
73
73
return device
@@ -162,7 +162,7 @@ def __init__(self, vllm_config: "VllmConfig") -> None:
162
162
GLOBAL_RANKTABLE , self .prefill_tp , self .decode_tp )
163
163
164
164
def get_device (self , server_id : str , dp_rank : int ,
165
- tp_rank : int ) -> DeviceInfo :
165
+ tp_rank : int ) -> Union [ DeviceInfo , None ] :
166
166
for server in self ._servers :
167
167
if server .server_id != server_id :
168
168
continue
@@ -225,7 +225,7 @@ def _get_first_matching_value(self, config_dict: dict,
225
225
return default
226
226
227
227
228
- _CLUSTER_INFO : "ClusterInfo" = None
228
+ _CLUSTER_INFO : Optional [ "ClusterInfo" ] = None
229
229
230
230
231
231
def init_cluster_info (vllm_config : "VllmConfig" ) -> None :
@@ -272,7 +272,7 @@ def __init__(self, role: llm_datadist.LLMRole, local_rank: int,
272
272
local_device_info = self .cluster_info .get_device (
273
273
local_server_id , dp_rank , tp_rank )
274
274
assert local_device_info is not None , \
275
- f "Could not find local device from cluster info."
275
+ "Could not find local device from cluster info."
276
276
277
277
self .cluster_id = local_device_info .cluster_id
278
278
self .local_device_ip = local_device_info .device_ip
@@ -379,8 +379,8 @@ def __init__(self, vllm_config: "VllmConfig",
379
379
self .kv_role = llm_datadist .LLMRole .DECODER
380
380
else :
381
381
raise ValueError (
382
- f "The value of kv_role must be either `kv_producer` or `kv_consumer`, but received { kv_transfer_config . kv_role } . "
383
- )
382
+ "The value of kv_role must be either `kv_producer` or"
383
+ f" `kv_consumer`, but received { kv_transfer_config . kv_role } ." )
384
384
385
385
# Used by scheduler process
386
386
self ._requests_need_load : dict [str , Request ] = {}
@@ -400,7 +400,7 @@ def __init__(self, vllm_config: "VllmConfig",
400
400
"local_server_id" , None )
401
401
assert (
402
402
self .local_server_id is not None
403
- ), f "Cannot find `local_server_id` from `kv_transfer_config.kv_connector_extra_config`."
403
+ ), "Cannot find `local_server_id` from `kv_transfer_config.kv_connector_extra_config`."
404
404
405
405
self .dp_rank = self ._vllm_config .parallel_config .data_parallel_rank
406
406
self .tp_size = self ._vllm_config .parallel_config .tensor_parallel_size
@@ -474,7 +474,7 @@ def start_load_kv(self, forward_context: "ForwardContext",
474
474
# this is the cause.
475
475
if prefill_infos is None :
476
476
logger .error (
477
- f "[rank%d][D]: Failed to get prefill info, redo model forwarding." ,
477
+ "[rank%d][D]: Failed to get prefill info, redo model forwarding." ,
478
478
torch .distributed .get_rank ())
479
479
return None
480
480
@@ -853,8 +853,8 @@ def _create_cache_tensors(self,
853
853
except LLMException as e :
854
854
if e .status_code == LLMStatusCode .LLM_DEVICE_OUT_OF_MEMORY :
855
855
logger .warning (
856
- f "allocate_cache failed due to insufficient space in the mbuf memory. "
857
- )
856
+ "allocate_cache failed due to insufficient space in the"
857
+ " mbuf memory." )
858
858
time .sleep (0.03 ) # wait for cache buf to be ready
859
859
else :
860
860
raise e
0 commit comments