Skip to content

Commit bd44d6a

Browse files
ngxsonMinh141120
authored andcommitted
model : gemma3n text-only (ggml-org#14400)
* gemma3n * add llm_graph_input_one
1 parent 5deba9e commit bd44d6a

File tree

3 files changed

+27
-7
lines changed

3 files changed

+27
-7
lines changed

convert_hf_to_gguf.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -936,11 +936,7 @@ def _create_vocab_sentencepiece(self):
936936
scores: list[float] = [-10000.0] * vocab_size
937937
toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size
938938

939-
for token_id in range(tokenizer.vocab_size()):
940-
if token_id >= vocab_size:
941-
logger.warning(f'ignore tokens from {token_id}: id is out of range, max={vocab_size - 1}')
942-
break
943-
939+
for token_id in range(vocab_size):
944940
piece = tokenizer.IdToPiece(token_id)
945941
text = piece.encode("utf-8")
946942
score = tokenizer.GetScore(token_id)
@@ -955,6 +951,10 @@ def _create_vocab_sentencepiece(self):
955951
elif tokenizer.IsByte(token_id):
956952
toktype = SentencePieceTokenTypes.BYTE
957953

954+
if token_id >= vocab_size:
955+
logger.warning(f'ignore tokens from {token_id}: id is out of range, max={vocab_size - 1}')
956+
break
957+
958958
tokens[token_id] = text
959959
scores[token_id] = score
960960
toktypes[token_id] = toktype

src/llama-graph.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,12 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
354354
}
355355
}
356356

357+
void llm_graph_input_one::set_input(const llama_ubatch *) {
358+
GGML_ASSERT(one && ggml_nelements(one) == 1);
359+
float f_one = 1.0f;
360+
ggml_backend_tensor_set(one, &f_one, 0, sizeof(float));
361+
}
362+
357363
//
358364
// llm_graph_context
359365
//
@@ -1299,9 +1305,12 @@ ggml_tensor * llm_graph_context::build_attn(
12991305

13001306
const auto * mctx_cur = is_swa ? mctx_iswa->get_swa() : mctx_iswa->get_base();
13011307

1302-
// store to KV cache
1303-
{
1308+
// optionally store to KV cache
1309+
if (k_cur) {
13041310
ggml_build_forward_expand(gf, mctx_cur->cpy_k(ctx0, k_cur, il));
1311+
}
1312+
1313+
if (v_cur) {
13051314
ggml_build_forward_expand(gf, mctx_cur->cpy_v(ctx0, v_cur, il));
13061315
}
13071316

src/llama-graph.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,17 @@ class llm_graph_input_mem_hybrid : public llm_graph_input_i {
330330
const llama_memory_hybrid_context * mctx;
331331
};
332332

333+
// TODO: remove this when ggml_scale_add is implemented
334+
class llm_graph_input_one : public llm_graph_input_i {
335+
public:
336+
llm_graph_input_one() {}
337+
virtual ~llm_graph_input_one() = default;
338+
339+
void set_input(const llama_ubatch *) override;
340+
341+
ggml_tensor * one = nullptr; // F32
342+
};
343+
333344
//
334345
// llm_graph_result
335346
//

0 commit comments

Comments
 (0)