File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -112,8 +112,10 @@ def __init__(self,
112
112
113
113
# disable sliding window for the second half of the model
114
114
sliding_window = config .interleaved_sliding_window [layer_idx ]
115
- if layer_idx >= config .num_hidden_layers // 2 or layer_idx % 2 == 0 :
116
- assert sliding_window == None , "sliding_window is not none"
115
+ if layer_idx >= config .num_hidden_layers // 2 :
116
+ assert sliding_window is None , "sliding_window must be none for the second decoder"
117
+ else :
118
+ assert sliding_window is not None , "sliding_window must be set for the first decoder"
117
119
118
120
assert self .num_heads % 2 == 0 , 'num_heads should be even'
119
121
assert self .num_key_value_heads % 2 == 0 , 'num_heads should be even'
@@ -397,12 +399,10 @@ def __init__(self,
397
399
self .input_layernorm = nn .LayerNorm (config .hidden_size , eps = config .layer_norm_eps )
398
400
399
401
self .yoco_mb = False
400
- self .yoco_kv = False
401
402
self .yoco_cross = False
402
403
assert config .num_hidden_layers % 4 == 0 , 'n_layer should be divisible by 4 for SambaY + yoco'
403
404
if layer_idx >= config .num_hidden_layers // 2 :
404
405
self .yoco_mb = True
405
- self .yoco_kv = (layer_idx >= (config .num_hidden_layers // 2 + 1 ))
406
406
self .yoco_cross = (layer_idx >= (config .num_hidden_layers // 2 + 2 ))
407
407
self .use_mamba = config .mb_per_layer > 0 and layer_idx % config .mb_per_layer == 0
408
408
if self .use_mamba :
You can’t perform that action at this time.
0 commit comments