Skip to content

llama : remove llm_graph_input_one #14603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/llama-graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,6 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
}
}

void llm_graph_input_one::set_input(const llama_ubatch * ubatch) {
GGML_UNUSED(ubatch);
GGML_ASSERT(one && ggml_nelements(one) == 1);
float f_one = 1.0f;
ggml_backend_tensor_set(one, &f_one, 0, sizeof(float));
}

//
// llm_graph_context
//
Expand Down
11 changes: 0 additions & 11 deletions src/llama-graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,17 +352,6 @@ class llm_graph_input_mem_hybrid : public llm_graph_input_i {
const llama_memory_hybrid_context * mctx;
};

// TODO: remove this when ggml_scale_add is implemented
class llm_graph_input_one : public llm_graph_input_i {
public:
llm_graph_input_one() {}
virtual ~llm_graph_input_one() = default;

void set_input(const llama_ubatch * ubatch) override;

ggml_tensor * one = nullptr; // F32
};

//
// llm_graph_result
//
Expand Down
12 changes: 1 addition & 11 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9382,8 +9382,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
const int n_layer_sparsity = 10; // number of layers using activation sparsity
const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)

ggml_tensor * one; // containing single element 1.0f

llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf)
: llm_graph_context(params),
model(model),
Expand All @@ -9395,14 +9393,6 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
ggml_tensor * cur;
ggml_tensor * inpL;

// TODO: remove this when ggml_scale_add is implemented
one = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
{
auto inp = std::make_unique<llm_graph_input_one>();
inp->one = one;
res->add_input(std::move(inp));
}

inpL = build_inp_embd(model.tok_embd);

// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
Expand Down Expand Up @@ -9792,7 +9782,7 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
cb(innovation, "innovation", il);

ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens]
all_coefs = ggml_add(ctx0, all_coefs, one);
all_coefs = ggml_scale_bias(ctx0, all_coefs, 1.0f, 1.0f); // + 1.0
cb(all_coefs, "all_coefs", il);
all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup]
all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup]
Expand Down
Loading