Skip to content

Commit b31548a

Browse files
committed
Latest commits
1 parent 102dd30 commit b31548a

File tree

6 files changed

+32
-6
lines changed

6 files changed

+32
-6
lines changed

Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,6 @@ OBJS_GGUF_LLAMA = \
479479
$(TMP)$(PREFIX)_llama-hparams.o \
480480
$(TMP)$(PREFIX)_llama-impl.o \
481481
$(TMP)$(PREFIX)_llama-io.o \
482-
$(TMP)$(PREFIX)_llama-kv-cache.o \
483482
$(TMP)$(PREFIX)_llama-kv-cache-unified.o \
484483
$(TMP)$(PREFIX)_llama-kv-cache-unified-iswa.o \
485484
$(TMP)$(PREFIX)_llama-kv-cache-recurrent.o \

base_sampling2/master/src/llama-arch.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
200200
{ LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
201201
{ LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
202202
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
203-
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
204203
{ LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
205204
{ LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
206205
{ LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
@@ -1707,8 +1706,14 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
17071706
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
17081707

17091708
std::string LLM_KV::operator()(llm_kv kv) const {
1710-
return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
1711-
: ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1709+
std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1710+
1711+
if (suffix != nullptr) {
1712+
name += ".";
1713+
name += suffix;
1714+
}
1715+
1716+
return name;
17121717
}
17131718

17141719
std::string LLM_TN_IMPL::str() const {

base_sampling2/master/src/llama-arch.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,6 @@ enum llm_kv {
196196
LLM_KV_TOKENIZER_HF_JSON,
197197
LLM_KV_TOKENIZER_RWKV,
198198
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
199-
LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
200199
LLM_KV_TOKENIZER_FIM_PRE_ID,
201200
LLM_KV_TOKENIZER_FIM_SUF_ID,
202201
LLM_KV_TOKENIZER_FIM_MID_ID,

base_sampling2/master/src/llama-graph.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,28 @@ ggml_tensor * llm_graph_context::build_ffn(
659659
cur = ggml_mul(ctx0, x0, x1);
660660
cb(cur, "ffn_mul", il);
661661
} break;
662+
case LLM_FFN_GEGLU:
663+
{
664+
// Split into two equal parts
665+
int64_t split_point = cur->ne[0] / 2;
666+
ggml_tensor * output_ffn_up = ggml_cont(ctx0, ggml_view_2d(
667+
ctx0, cur, split_point,
668+
cur->ne[1], cur->nb[1], 0
669+
));
670+
ggml_tensor * output_ffn_gate = ggml_cont(ctx0, ggml_view_2d(
671+
ctx0, cur, split_point,
672+
cur->ne[1], cur->nb[1],
673+
split_point * ggml_element_size(cur)
674+
));
675+
676+
// Apply GELU activation function to the first part
677+
output_ffn_up = ggml_gelu(ctx0, output_ffn_up);
678+
cb(output_ffn_up, "ffn_gelu", il);
679+
680+
// Element-wise multiplication between the activated part and the gate part
681+
cur = ggml_mul(ctx0, output_ffn_up, output_ffn_gate);
682+
cb(cur, "ffn_geglu", il);
683+
} break;
662684
}
663685

664686
if (gate && type_gate == LLM_FFN_PAR) {

base_sampling2/master/src/llama-graph.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum llm_ffn_op_type {
3636
LLM_FFN_RELU,
3737
LLM_FFN_RELU_SQR,
3838
LLM_FFN_SWIGLU,
39+
LLM_FFN_GEGLU,
3940
};
4041

4142
enum llm_ffn_gate_type {

base_sampling2/master/src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13788,7 +13788,7 @@ uint64_t llama_model_size(const llama_model * model) {
1378813788
}
1378913789

1379013790
const char * llama_model_chat_template(const llama_model * model, const char * name) {
13791-
const auto key = name ? LLM_KV(model->arch, name)(LLM_KV_TOKENIZER_CHAT_TEMPLATE_N)
13791+
const auto key = name ? LLM_KV(model->arch, name)(LLM_KV_TOKENIZER_CHAT_TEMPLATE)
1379213792
: LLM_KV(model->arch)(LLM_KV_TOKENIZER_CHAT_TEMPLATE);
1379313793
const auto & it = model->gguf_kv.find(key);
1379413794
if (it == model->gguf_kv.end()) {

0 commit comments

Comments
 (0)