Skip to content

Commit 5bc6e99

Browse files
author
Alex
committed
transformers hybrid attention implementation vllm-project#17198
Handle named parameters cleanup actually fix parameter initialization lint . Signed-off-by: Alex <alexwu@character.ai> . . Signed-off-by: Alex <alexwu@character.ai> fix comment Signed-off-by: Alex <alexwu@character.ai> . Signed-off-by: Alex <alexwu@character.ai> . Signed-off-by: Alex <alexwu@character.ai>
1 parent aea302b commit 5bc6e99

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

vllm/model_executor/models/transformers.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,20 @@ def create_attention_instances(self) -> dict[int, Attention]:
261261
num_kv_heads = self.model_config.get_num_kv_heads(self.parallel_config)
262262
start, end = get_pp_indices(self.config.num_hidden_layers,
263263
self.pp_rank, self.pp_size)
264-
return {
265-
i:
266-
Attention(
264+
265+
attention_instances = {}
266+
if hasattr(self.config, "global_attention_layers") and isinstance(
267+
self.config.global_attention_layers, list):
268+
global_attention_layers = self.config.global_attention_layers
269+
else:
270+
global_attention_layers = None
271+
272+
for i in range(start, end):
273+
sliding_window = None
274+
if i in global_attention_layers:
275+
assert self.config.sliding_window is not None
276+
sliding_window = self.config.sliding_window
277+
attention_instances[i] = Attention(
267278
num_heads=num_heads,
268279
head_size=head_size,
269280
# NOTE: We use Llama scale as default, if it's set by
@@ -272,9 +283,10 @@ def create_attention_instances(self) -> dict[int, Attention]:
272283
num_kv_heads=num_kv_heads,
273284
cache_config=self.cache_config,
274285
quant_config=self.quant_config,
286+
per_layer_sliding_window=sliding_window,
275287
prefix=f"{i}.attn")
276-
for i in range(start, end)
277-
}
288+
289+
return attention_instances
278290

279291
def init_buffers(self, module: nn.Module):
280292
"""

0 commit comments

Comments
 (0)