We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
use_irope
1 parent 5f1ac1e commit 467bef1Copy full SHA for 467bef1
vllm/v1/attention/backends/flashinfer.py
@@ -508,7 +508,12 @@ def __init__(
508
logits_soft_cap: Optional[float] = None,
509
attn_type: AttentionType = AttentionType.DECODER,
510
kv_sharing_target_layer_name: Optional[int] = None,
511
+ use_irope: bool = False,
512
) -> None:
513
+ if use_irope:
514
+ logger.warning_once(
515
+ "Using irope in FlashInfer is not supported yet, it will fall"
516
+ " back to global attention for long context.")
517
self.num_heads = num_heads
518
self.head_size = head_size
519
self.scale = float(scale)
0 commit comments