Skip to content

Commit 16b73d4

Browse files
committed
use build_ffn with LLM_FFN_GEGLU
1 parent 4fc4bf6 commit 16b73d4

File tree

1 file changed

+6
-24
lines changed

1 file changed

+6
-24
lines changed

src/llama-model.cpp

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6214,30 +6214,12 @@ struct llm_build_modern_bert : public llm_graph_context {
62146214
ggml_tensor * ffn_inp = cur;
62156215
cb(ffn_inp, "ffn_inp", il);
62166216

6217-
// feed-forward network
6218-
ggml_tensor * ffn_up = build_lora_mm(model.layers[il].ffn_up, cur);
6219-
cb(ffn_up, "ffn_up", il);
6220-
6221-
int64_t split_point = ffn_up->ne[0] / 2;
6222-
ggml_tensor * output_ffn_up = ggml_cont(ctx0, ggml_view_2d(
6223-
ctx0, ffn_up, split_point,
6224-
ffn_up->ne[1], ffn_up->nb[1], 0
6225-
));
6226-
ggml_tensor * output_ffn_gate = ggml_cont(ctx0, ggml_view_2d(
6227-
ctx0, ffn_up, split_point,
6228-
ffn_up->ne[1], ffn_up->nb[1],
6229-
split_point * ggml_element_size(ffn_up)
6230-
));
6231-
6232-
// Apply activation function
6233-
output_ffn_up = ggml_gelu(ctx0, output_ffn_up);
6234-
6235-
// Element-wise multiplication
6236-
ggml_tensor * gated = ggml_mul(ctx0, output_ffn_up, output_ffn_gate);
6237-
cb(gated, "ffn_gated", il);
6238-
6239-
// Final projection
6240-
cur = build_lora_mm(model.layers[il].ffn_down, gated);
6217+
cur = build_ffn(cur,
6218+
model.layers[il].ffn_up,
6219+
NULL, NULL, NULL, NULL, NULL,
6220+
model.layers[il].ffn_down,
6221+
NULL, NULL, NULL,
6222+
LLM_FFN_GEGLU, LLM_FFN_SEQ, il);
62416223

62426224
// attentions bypass the intermediate layer
62436225
cur = ggml_add(ctx0, cur, ffn_inp);

0 commit comments

Comments
 (0)