Skip to content

Commit 2b36420

Browse files
committed
Merge remote-tracking branch 'origin/master' into GraniteFour
* origin/master: llama : remove llm_graph_input_one (ggml-org#14603) Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
2 parents 8dd7f97 + cb9178f commit 2b36420

File tree

3 files changed

+1
-28
lines changed

3 files changed

+1
-28
lines changed

src/llama-graph.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -340,12 +340,6 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
340340
inp_rs->set_input(ubatch);
341341
}
342342

343-
void llm_graph_input_one::set_input(const llama_ubatch *) {
344-
GGML_ASSERT(one && ggml_nelements(one) == 1);
345-
float f_one = 1.0f;
346-
ggml_backend_tensor_set(one, &f_one, 0, sizeof(float));
347-
}
348-
349343
//
350344
// llm_graph_context
351345
//

src/llama-graph.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -341,17 +341,6 @@ class llm_graph_input_mem_hybrid : public llm_graph_input_i {
341341
const llama_memory_hybrid_context * mctx;
342342
};
343343

344-
// TODO: remove this when ggml_scale_add is implemented
345-
class llm_graph_input_one : public llm_graph_input_i {
346-
public:
347-
llm_graph_input_one() {}
348-
virtual ~llm_graph_input_one() = default;
349-
350-
void set_input(const llama_ubatch * ubatch) override;
351-
352-
ggml_tensor * one = nullptr; // F32
353-
};
354-
355344
//
356345
// llm_graph_result
357346
//

src/llama-model.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9636,8 +9636,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
96369636
const int n_layer_sparsity = 10; // number of layers using activation sparsity
96379637
const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
96389638

9639-
ggml_tensor * one; // containing single element 1.0f
9640-
96419639
llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf)
96429640
: llm_graph_context(params),
96439641
model(model),
@@ -9649,14 +9647,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
96499647
ggml_tensor * cur;
96509648
ggml_tensor * inpL;
96519649

9652-
// TODO: remove this when ggml_scale_add is implemented
9653-
one = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
9654-
{
9655-
auto inp = std::make_unique<llm_graph_input_one>();
9656-
inp->one = one;
9657-
res->add_input(std::move(inp));
9658-
}
9659-
96609650
inpL = build_inp_embd(model.tok_embd);
96619651

96629652
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
@@ -10046,7 +10036,7 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
1004610036
cb(innovation, "innovation", il);
1004710037

1004810038
ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens]
10049-
all_coefs = ggml_add(ctx0, all_coefs, one);
10039+
all_coefs = ggml_scale_bias(ctx0, all_coefs, 1.0f, 1.0f); // + 1.0
1005010040
cb(all_coefs, "all_coefs", il);
1005110041
all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup]
1005210042
all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup]

0 commit comments

Comments
 (0)