Skip to content

Commit bb590f2

Browse files
committed
fix: Conform to standard way of initializing inp_out_ids
Branch: GraniteFour Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent b6d772f commit bb590f2

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/llama-model.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9921,7 +9921,7 @@ struct llm_build_mamba : public llm_graph_context {
99219921
cur = build_mamba_layer(rs_inp, gf, cur, model, ubatch, il);
99229922
}
99239923

9924-
if (il == n_layer - 1) {
9924+
if (il == n_layer - 1 && inp_out_ids) {
99259925
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
99269926
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
99279927
}
@@ -13785,6 +13785,8 @@ struct llm_build_granite_hybrid : public llm_graph_context {
1378513785

1378613786
auto * inp = build_inp_mem_hybrid();
1378713787

13788+
ggml_tensor * inp_out_ids = build_inp_out_ids();
13789+
1378813790
// Positional embeddings populated if rope enabled
1378913791
ggml_tensor * inp_pos = nullptr;
1379013792
if (use_rope) {
@@ -13810,9 +13812,7 @@ struct llm_build_granite_hybrid : public llm_graph_context {
1381013812
n_embd_head, use_rope, il);
1381113813
}
1381213814

13813-
if (il == n_layer - 1) {
13814-
// skip computing output for unused tokens
13815-
ggml_tensor * inp_out_ids = build_inp_out_ids();
13815+
if (il == n_layer - 1 && inp_out_ids) {
1381613816
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
1381713817
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
1381813818
}

0 commit comments

Comments
 (0)