fix: Conform to standard way of initializing inp_out_ids

gabe-l-hart · gabe-l-hart · commit bb590f2e166a · 2025-07-02T12:49:29.000-06:00
Branch: GraniteFour

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -9921,7 +9921,7 @@ struct llm_build_mamba : public llm_graph_context {
                 cur = build_mamba_layer(rs_inp, gf, cur, model, ubatch, il);
             }
 
-            if (il == n_layer - 1) {
+            if (il == n_layer - 1 && inp_out_ids) {
                 cur  = ggml_get_rows(ctx0,  cur, inp_out_ids);
                 inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
             }
@@ -13785,6 +13785,8 @@ struct llm_build_granite_hybrid : public llm_graph_context {
 
         auto * inp = build_inp_mem_hybrid();
 
+        ggml_tensor * inp_out_ids = build_inp_out_ids();
+
         // Positional embeddings populated if rope enabled
         ggml_tensor * inp_pos = nullptr;
         if (use_rope) {
@@ -13810,9 +13812,7 @@ struct llm_build_granite_hybrid : public llm_graph_context {
                     n_embd_head, use_rope, il);
             }
 
-            if (il == n_layer - 1) {
-                // skip computing output for unused tokens
-                ggml_tensor * inp_out_ids = build_inp_out_ids();
+            if (il == n_layer - 1 && inp_out_ids) {
                 cur   = ggml_get_rows(ctx0,   cur, inp_out_ids);
                 inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
             }

Original file line number	Diff line number	Diff line change
`@@ -9921,7 +9921,7 @@ struct llm_build_mamba : public llm_graph_context {`
`9921`	`9921`	`cur = build_mamba_layer(rs_inp, gf, cur, model, ubatch, il);`
`9922`	`9922`	`}`
`9923`	`9923`
`9924`		`- if (il == n_layer - 1) {`
	`9924`	`+ if (il == n_layer - 1 && inp_out_ids) {`
`9925`	`9925`	`cur = ggml_get_rows(ctx0, cur, inp_out_ids);`
`9926`	`9926`	`inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);`
`9927`	`9927`	`}`
`@@ -13785,6 +13785,8 @@ struct llm_build_granite_hybrid : public llm_graph_context {`
`13785`	`13785`
`13786`	`13786`	`auto * inp = build_inp_mem_hybrid();`
`13787`	`13787`
	`13788`	`+ ggml_tensor * inp_out_ids = build_inp_out_ids();`
	`13789`	`+`
`13788`	`13790`	`// Positional embeddings populated if rope enabled`
`13789`	`13791`	`ggml_tensor * inp_pos = nullptr;`
`13790`	`13792`	`if (use_rope) {`
`@@ -13810,9 +13812,7 @@ struct llm_build_granite_hybrid : public llm_graph_context {`
`13810`	`13812`	`n_embd_head, use_rope, il);`
`13811`	`13813`	`}`
`13812`	`13814`
`13813`		`- if (il == n_layer - 1) {`
`13814`		`- // skip computing output for unused tokens`
`13815`		`- ggml_tensor * inp_out_ids = build_inp_out_ids();`
	`13815`	`+ if (il == n_layer - 1 && inp_out_ids) {`
`13816`	`13816`	`cur = ggml_get_rows(ctx0, cur, inp_out_ids);`
`13817`	`13817`	`inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);`
`13818`	`13818`	`}`