[BugFix][FlashInfer] Fix attention backend interface mismatch with unexpected keyword use_irope (vllm-project#19134)

YUNQIUGUO · web-flow · commit 467bef18a353 · 2025-06-10T16:48:51.000+08:00
Signed-off-by: Yunqiu Guo &lt;guorachel@meta.com&gt;
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
@@ -508,7 +508,12 @@ def __init__(
         logits_soft_cap: Optional[float] = None,
         attn_type: AttentionType = AttentionType.DECODER,
         kv_sharing_target_layer_name: Optional[int] = None,
+        use_irope: bool = False,
     ) -> None:
+        if use_irope:
+            logger.warning_once(
+                "Using irope in FlashInfer is not supported yet, it will fall"
+                " back to global attention for long context.")
         self.num_heads = num_heads
         self.head_size = head_size
         self.scale = float(scale)