Skip to content

Commit 8f3af99

Browse files
committed
refactor: Rename llm_build_hybrid_mamba -> llm_build_granite_hybrid
I've got back-and-forth a lot about how/if to try to implement reuse of the "child model" layer types for hybrid models. At the end of the day, I think hybrid models are their own beast and even if their layers are inspired by other models, they should maintain control of their own layer building (in other words, the copy-paste method). Given that, the name should reflect that this is not a generic hybrid model builder, but rather a granite- specific hybrid model builder that can do MoE (granite 4) or dense (bamba). As part if this, I also cleaned up dangling comments from previous attempts at using static methods for reusability. Branch: GraniteFour Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent faba0c3 commit 8f3af99

File tree

1 file changed

+11
-16
lines changed

1 file changed

+11
-16
lines changed

src/llama-model.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13053,8 +13053,6 @@ struct llm_build_granite : public llm_graph_context {
1305313053
ggml_build_forward_expand(gf, cur);
1305413054
}
1305513055

13056-
// static layer build function that enables other models to borrow this
13057-
// layer logic
1305813056
ggml_tensor * build_granite_attention_layer(
1305913057
ggml_cgraph * gf,
1306013058
ggml_tensor * cur,
@@ -13118,7 +13116,6 @@ struct llm_build_granite : public llm_graph_context {
1311813116
return cur;
1311913117
}
1312013118

13121-
// static ffn layer builder for reuse in hybrid architectures
1312213119
ggml_tensor * build_layer_ffn(
1312313120
ggml_tensor * cur,
1312413121
ggml_tensor * inpSA,
@@ -13199,16 +13196,17 @@ struct llm_build_granite : public llm_graph_context {
1319913196
}
1320013197
};
1320113198

13202-
struct llm_build_hybrid_mamba : public llm_graph_context {
13199+
struct llm_build_granite_hybrid : public llm_graph_context {
1320313200

1320413201
const llama_model & model;
1320513202

13206-
llm_build_hybrid_mamba(
13207-
const llama_model & model,
13208-
const llm_graph_params & params,
13209-
ggml_cgraph * gf,
13210-
const bool use_rope = true)
13211-
: llm_graph_context(params), model(model) {
13203+
llm_build_granite_hybrid(
13204+
const llama_model & model,
13205+
const llm_graph_params & params,
13206+
ggml_cgraph * gf,
13207+
const bool use_rope = true) :
13208+
llm_graph_context(params), model(model) {
13209+
1321213210
const int64_t n_embd_head = hparams.n_embd_head_v;
1321313211
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
1321413212

@@ -13285,7 +13283,7 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1328513283
ggml_cgraph * gf,
1328613284
ggml_tensor * cur,
1328713285
const llama_ubatch & ubatch,
13288-
int il) const {
13286+
int il) const {
1328913287
const auto * kv_state = static_cast<const llama_memory_hybrid_state *>(mstate)->get_state_recr();
1329013288

1329113289
const auto kv_head = kv_state->get_head();
@@ -13408,8 +13406,6 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1340813406
return cur;
1340913407
}
1341013408

13411-
// static layer build function that enables other models to borrow this
13412-
// layer logic
1341313409
ggml_tensor * build_granite_attention_layer(
1341413410
ggml_cgraph * gf,
1341513411
ggml_tensor * cur,
@@ -13473,7 +13469,6 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1347313469
return cur;
1347413470
}
1347513471

13476-
// static ffn layer builder for reuse in hybrid architectures
1347713472
ggml_tensor * build_layer_ffn(
1347813473
ggml_tensor * cur,
1347913474
ggml_tensor * inpSA,
@@ -14843,12 +14838,12 @@ llm_graph_result_ptr llama_model::build_graph(
1484314838
} break;
1484414839
case LLM_ARCH_GRANITE_MOE_HYBRID:
1484514840
{
14846-
llm = std::make_unique<llm_build_hybrid_mamba>(*this, params, gf,
14841+
llm = std::make_unique<llm_build_granite_hybrid>(*this, params, gf,
1484714842
/* use_rope */ false);
1484814843
} break;
1484914844
case LLM_ARCH_BAMBA:
1485014845
{
14851-
llm = std::make_unique<llm_build_hybrid_mamba>(*this, params, gf,
14846+
llm = std::make_unique<llm_build_granite_hybrid>(*this, params, gf,
1485214847
/* use_rope */ true);
1485314848
} break;
1485414849
case LLM_ARCH_CHAMELEON:

0 commit comments

Comments
 (0)