@@ -618,6 +618,28 @@ ggml_tensor * llm_graph_context::build_ffn(
618
618
cur = ggml_reglu (ctx0, cur);
619
619
cb (cur, " ffn_reglu" , il);
620
620
} break ;
621
+ case LLM_FFN_GEGLU:
622
+ {
623
+ // Split into two equal parts
624
+ int64_t split_point = cur->ne [0 ] / 2 ;
625
+ ggml_tensor * output_ffn_up = ggml_cont (ctx0, ggml_view_2d (
626
+ ctx0, cur, split_point,
627
+ cur->ne [1 ], cur->nb [1 ], 0
628
+ ));
629
+ ggml_tensor * output_ffn_gate = ggml_cont (ctx0, ggml_view_2d (
630
+ ctx0, cur, split_point,
631
+ cur->ne [1 ], cur->nb [1 ],
632
+ split_point * ggml_element_size (cur)
633
+ ));
634
+
635
+ // Apply GELU activation function to the first part
636
+ output_ffn_up = ggml_gelu (ctx0, output_ffn_up);
637
+ cb (output_ffn_up, " ffn_gelu" , il);
638
+
639
+ // Element-wise multiplication between the activated part and the gate part
640
+ cur = ggml_mul (ctx0, output_ffn_up, output_ffn_gate);
641
+ cb (cur, " ffn_geglu" , il);
642
+ } break ;
621
643
}
622
644
623
645
if (gate && type_gate == LLM_FFN_PAR) {
0 commit comments