graph : remove model reference from build_pooling

ggerganov · ggerganov · commit a9d509691a2d · 2025-03-11T13:53:42.000+02:00
ggml-ci
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
@@ -1992,7 +1992,12 @@ ggml_tensor * llm_graph_context::build_rwkv_channel_mix(
     return cur;
 }
 
-void llm_graph_context::build_pooling(ggml_cgraph * gf) const {
+void llm_graph_context::build_pooling(
+        ggml_cgraph * gf,
+        ggml_tensor * cls,
+        ggml_tensor * cls_b,
+        ggml_tensor * cls_out,
+        ggml_tensor * cls_out_b) const {
     if (!cparams.embeddings) {
         return;
     }
@@ -2036,18 +2041,18 @@ void llm_graph_context::build_pooling(ggml_cgraph * gf) const {
 
                 // classification head
                 // https://github.com/huggingface/transformers/blob/5af7d41e49bbfc8319f462eb45253dcb3863dfb7/src/transformers/models/roberta/modeling_roberta.py#L1566
-                GGML_ASSERT(model.cls   != nullptr);
-                GGML_ASSERT(model.cls_b != nullptr);
+                GGML_ASSERT(cls   != nullptr);
+                GGML_ASSERT(cls_b != nullptr);
 
-                cur = ggml_add (ctx0, ggml_mul_mat(ctx0, model.cls, inp), model.cls_b);
+                cur = ggml_add (ctx0, ggml_mul_mat(ctx0, cls, inp), cls_b);
                 cur = ggml_tanh(ctx0, cur);
 
                 // some models don't have `cls_out`, for example: https://huggingface.co/jinaai/jina-reranker-v1-tiny-en
                 // https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/blob/cb5347e43979c3084a890e3f99491952603ae1b7/modeling_bert.py#L884-L896
-                if (model.cls_out) {
-                    GGML_ASSERT(model.cls_out_b != nullptr);
+                if (cls_out) {
+                    GGML_ASSERT(cls_out_b != nullptr);
 
-                    cur = ggml_add (ctx0, ggml_mul_mat(ctx0, model.cls_out, cur), model.cls_out_b);
+                    cur = ggml_add (ctx0, ggml_mul_mat(ctx0, cls_out, cur), cls_out_b);
                 }
             } break;
         default:
diff --git a/src/llama-graph.h b/src/llama-graph.h
@@ -617,5 +617,10 @@ struct llm_graph_context {
     // pooling
     //
 
-    void build_pooling(ggml_cgraph * gf) const;
+    void build_pooling(
+            ggml_cgraph * gf,
+            ggml_tensor * cls,
+            ggml_tensor * cls_b,
+            ggml_tensor * cls_out,
+            ggml_tensor * cls_out_b) const;
 };
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -10583,7 +10583,7 @@ llm_graph_result_ptr llama_model::build_graph(
     }
 
     // add on pooling layer
-    llm->build_pooling(gf);
+    llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b);
 
     return std::move(llm->res);
 }

Original file line number	Diff line number	Diff line change
`@@ -10583,7 +10583,7 @@ llm_graph_result_ptr llama_model::build_graph(`
`10583`	`10583`	`}`
`10584`	`10584`
`10585`	`10585`	`// add on pooling layer`
`10586`		`- llm->build_pooling(gf);`
	`10586`	`+ llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b);`
`10587`	`10587`
`10588`	`10588`	`return std::move(llm->res);`
`10589`	`10589`	`}`