Skip to content

Commit a9dcc84

Browse files
committed
refactor: Rename llm_build_hybrid_mamba -> llm_build_granite_hybrid
I've got back-and-forth a lot about how/if to try to implement reuse of the "child model" layer types for hybrid models. At the end of the day, I think hybrid models are their own beast and even if their layers are inspired by other models, they should maintain control of their own layer building (in other words, the copy-paste method). Given that, the name should reflect that this is not a generic hybrid model builder, but rather a granite- specific hybrid model builder that can do MoE (granite 4) or dense (bamba). As part if this, I also cleaned up dangling comments from previous attempts at using static methods for reusability. Branch: GraniteFour Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent 40e2346 commit a9dcc84

File tree

1 file changed

+10
-15
lines changed

1 file changed

+10
-15
lines changed

src/llama-model.cpp

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13069,8 +13069,6 @@ struct llm_build_granite : public llm_graph_context {
1306913069
ggml_build_forward_expand(gf, cur);
1307013070
}
1307113071

13072-
// static layer build function that enables other models to borrow this
13073-
// layer logic
1307413072
ggml_tensor * build_granite_attention_layer(
1307513073
ggml_cgraph * gf,
1307613074
ggml_tensor * cur,
@@ -13134,7 +13132,6 @@ struct llm_build_granite : public llm_graph_context {
1313413132
return cur;
1313513133
}
1313613134

13137-
// static ffn layer builder for reuse in hybrid architectures
1313813135
ggml_tensor * build_layer_ffn(
1313913136
ggml_tensor * cur,
1314013137
ggml_tensor * inpSA,
@@ -13215,16 +13212,17 @@ struct llm_build_granite : public llm_graph_context {
1321513212
}
1321613213
};
1321713214

13218-
struct llm_build_hybrid_mamba : public llm_graph_context {
13215+
struct llm_build_granite_hybrid : public llm_graph_context {
1321913216

1322013217
const llama_model & model;
1322113218

13222-
llm_build_hybrid_mamba(
13223-
const llama_model & model,
13224-
const llm_graph_params & params,
13225-
ggml_cgraph * gf,
13226-
const bool use_rope = true)
13227-
: llm_graph_context(params), model(model) {
13219+
llm_build_granite_hybrid(
13220+
const llama_model & model,
13221+
const llm_graph_params & params,
13222+
ggml_cgraph * gf,
13223+
const bool use_rope = true) :
13224+
llm_graph_context(params), model(model) {
13225+
1322813226
const int64_t n_embd_head = hparams.n_embd_head_v;
1322913227
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
1323013228

@@ -13424,8 +13422,6 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1342413422
return cur;
1342513423
}
1342613424

13427-
// static layer build function that enables other models to borrow this
13428-
// layer logic
1342913425
ggml_tensor * build_granite_attention_layer(
1343013426
ggml_cgraph * gf,
1343113427
ggml_tensor * cur,
@@ -13489,7 +13485,6 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1348913485
return cur;
1349013486
}
1349113487

13492-
// static ffn layer builder for reuse in hybrid architectures
1349313488
ggml_tensor * build_layer_ffn(
1349413489
ggml_tensor * cur,
1349513490
ggml_tensor * inpSA,
@@ -14859,12 +14854,12 @@ llm_graph_result_ptr llama_model::build_graph(
1485914854
} break;
1486014855
case LLM_ARCH_GRANITE_MOE_HYBRID:
1486114856
{
14862-
llm = std::make_unique<llm_build_hybrid_mamba>(*this, params, gf,
14857+
llm = std::make_unique<llm_build_granite_hybrid>(*this, params, gf,
1486314858
/* use_rope */ false);
1486414859
} break;
1486514860
case LLM_ARCH_BAMBA:
1486614861
{
14867-
llm = std::make_unique<llm_build_hybrid_mamba>(*this, params, gf,
14862+
llm = std::make_unique<llm_build_granite_hybrid>(*this, params, gf,
1486814863
/* use_rope */ true);
1486914864
} break;
1487014865
case LLM_ARCH_CHAMELEON:

0 commit comments

Comments
 (0)