Skip to content

Commit 6be275d

Browse files
committed
llama : update WavTokenizer to non-causal attn
1 parent 017763a commit 6be275d

File tree

2 files changed

+3
-0
lines changed

2 files changed

+3
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2069,6 +2069,8 @@ def set_gguf_parameters(self):
20692069
self.gguf_writer.add_convnext_embedding_length(self.hparams["convnext"]["n_embd"])
20702070
self.gguf_writer.add_convnext_block_count (self.hparams["convnext"]["n_layer"])
20712071

2072+
self.gguf_writer.add_causal_attention(False)
2073+
20722074

20732075
@Model.register("Qwen2MoeForCausalLM")
20742076
class Qwen2MoeModel(Model):

src/llama.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6393,6 +6393,7 @@ static void llm_load_hparams(
63936393
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
63946394
ml.get_key(LLM_KV_ATTENTION_GROUPNORM_EPS, hparams.f_norm_group_eps);
63956395
ml.get_key(LLM_KV_ATTENTION_GROUPNORM_GROUPS, hparams.n_norm_groups);
6396+
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
63966397
} break;
63976398
default: (void)0;
63986399
}

0 commit comments

Comments
 (0)