@@ -9904,7 +9904,9 @@ struct llm_build_mamba : public llm_graph_context {
9904
9904
// {n_embd, n_tokens}
9905
9905
inpL = build_inp_embd(model.tok_embd);
9906
9906
9907
- auto * inp = build_rs_inp();
9907
+ auto * rs_inp = build_rs_inp();
9908
+
9909
+ ggml_tensor * inp_out_ids = build_inp_out_ids();
9908
9910
9909
9911
for (int il = 0; il < n_layer; ++il) {
9910
9912
// norm
@@ -9914,14 +9916,12 @@ struct llm_build_mamba : public llm_graph_context {
9914
9916
cb(cur, "attn_norm", il);
9915
9917
9916
9918
if (model.arch == LLM_ARCH_MAMBA2) {
9917
- cur = build_mamba2_layer(inp , gf, cur, model, ubatch, il);
9919
+ cur = build_mamba2_layer(rs_inp , gf, cur, model, ubatch, il);
9918
9920
} else {
9919
- cur = build_mamba_layer(inp , gf, cur, model, ubatch, il);
9921
+ cur = build_mamba_layer(rs_inp , gf, cur, model, ubatch, il);
9920
9922
}
9921
9923
9922
9924
if (il == n_layer - 1) {
9923
- // skip computing output for unused tokens
9924
- ggml_tensor * inp_out_ids = build_inp_out_ids();
9925
9925
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
9926
9926
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
9927
9927
}
@@ -13550,7 +13550,6 @@ struct llm_build_arwkv7 : public llm_build_rwkv7_base {
13550
13550
};
13551
13551
13552
13552
struct llm_build_granite : public llm_graph_context {
13553
-
13554
13553
llm_build_granite(
13555
13554
const llama_model & model,
13556
13555
const llm_graph_params & params,
0 commit comments