reformat

leo-cf-tian · leo-cf-tian · commit e627f0a2de76 · 2025-06-23T21:12:26.000Z
Signed-off-by: Leo Tian &lt;leo.tian@centml.ai&gt;
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -40,9 +40,8 @@
 from vllm.sampling_params import SamplingType
 from vllm.sequence import IntermediateTensors
 from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
-                        GiB_bytes, LazyLoader, async_tensor_h2d, cdiv,
-                        check_use_alibi, get_dtype_size,
-                        is_pin_memory_available)
+                        GiB_bytes, LazyLoader, cdiv, check_use_alibi,
+                        get_dtype_size, is_pin_memory_available)
 from vllm.v1.attention.backends.mamba_attn import Mamba2AttentionBackend
 from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
                                               CommonAttentionMetadata)
@@ -1694,7 +1693,7 @@ def execute_model(
             finished_recving=finished_recving,
             num_nans_in_logits=num_nans_in_logits,
         )
-    
+
     def get_valid_sampled_token_ids(
             self, max_gen_len: int, sampled_token_ids: torch.Tensor,
             discard_sampled_tokens_req_indices: np.ndarray) -> list[list[int]]:
@@ -1715,7 +1714,6 @@ def get_valid_sampled_token_ids(
 
         return valid_sampled_token_ids
 
-
     def kv_connector_no_forward(
             self, scheduler_output: "SchedulerOutput") -> ModelRunnerOutput:
         # KV send/recv even if no work to do.