fix synchronize before transfer blocks

Changqi Lu · Changqi Lu · commit 7cf1c0ea6273 · 2025-04-21T10:50:21.000+08:00
Signed-off-by: Changqi Lu &lt;luchangqi.123@bytedance.com&gt;
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
@@ -339,8 +339,7 @@ def _read_blocks(self, worker_input: WorkerInput) -> None:
                 self.nixl_connector.read_blocks(worker_input.local_block_ids[i], worker_input.staging_block_ids[i], worker_input.remote_block_ids[i], worker_input.remote_engine_id[i])
 
     def _write_blocks(self, worker_input: WorkerInput) -> None:
-        if not self.is_driver_worker:
-            torch.cuda.synchronize() # to make sure that the blocks are ready, on driver worker we transfer after sampling, so there's no need to synchronize
+        torch.cuda.synchronize() # to make sure that the blocks are ready, on driver worker we transfer after sampling, so there's no need to synchronize
 
         for i, op_type in enumerate(worker_input.op_type):
             if op_type == MemoryOpType.WRITE: