Skip to content

Commit 45d1964

Browse files
committed
Changed for binary update to ggml-org/llama.cpp@0013715
1 parent d8a9fbf commit 45d1964

File tree

5 files changed

+42
-3
lines changed

5 files changed

+42
-3
lines changed

LLama/Native/LLamaModelParams.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ public unsafe struct LLamaModelParams
1313
/// todo: add support for llama_model_params.devices
1414
/// </summary>
1515
private IntPtr devices;
16+
17+
// NULL-terminated list of buffer types to use for tensors that match a pattern
18+
// actual type: llama_model_tensor_buft_override*
19+
// todo: add support for tensor_buft_overrides
20+
private IntPtr tensor_buft_overrides;
1621

1722
/// <summary>
1823
/// // number of layers to store in VRAM

LLama/Native/LLamaModelQuantizeParams.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ public bool keep_split
8989
/// </summary>
9090
public IntPtr kv_overrides;
9191

92+
/// <summary>
93+
/// pointer to vector containing tensor types
94+
/// </summary>
95+
public IntPtr tensor_types;
96+
9297
/// <summary>
9398
/// Create a LLamaModelQuantizeParams with default values
9499
/// </summary>

LLama/Native/LLamaVocabPreType.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,9 @@ internal enum LLamaVocabPreType
3838
MINERVA = 27,
3939
DEEPSEEK3_LLM = 28,
4040
GPT4O = 29,
41+
SUPERBPE = 30,
42+
TRILLION = 31,
43+
BAILINGMOE = 32,
44+
LLAMA4 = 33,
4145
}
4246
// ReSharper restore InconsistentNaming

LLama/Native/SafeLLamaContextHandle.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,15 @@ static SafeLLamaContextHandle()
389389

390390
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
391391
private static extern LLamaKvCacheNative llama_get_kv_self(SafeLLamaContextHandle ctx);
392+
393+
/// <summary>
394+
/// Set whether the model is in warmup mode or not
395+
/// If true, all model tensors are activated during llama_decode() to load and cache their weights.
396+
/// </summary>
397+
/// <param name="ctx"></param>
398+
/// <param name="warmup"></param>
399+
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
400+
private static extern void llama_set_warmup(SafeLLamaContextHandle ctx, [MarshalAs(UnmanagedType.U1)] bool warmup);
392401
#endregion
393402

394403
#region LoRA

LLama/Native/SafeLLamaSamplerHandle.cs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,20 +408,36 @@ public void AddFillInMiddleInfill(SafeLlamaModelHandle model)
408408
}
409409

410410
/// <summary>
411-
/// Create a sampler which makes tokens impossible unless they match the grammar
411+
/// Create a sampler which makes tokens impossible unless they match the grammar.
412412
/// </summary>
413-
/// <param name="model"></param>
413+
/// <param name="model">The model that this grammar will be used with</param>
414414
/// <param name="grammar"></param>
415415
/// <param name="root">Root rule of the grammar</param>
416416
/// <returns></returns>
417417
public void AddGrammar(SafeLlamaModelHandle model, string grammar, string root)
418+
{
419+
AddGrammar(model.Vocab, grammar, root);
420+
}
421+
422+
/// <summary>
423+
/// Create a sampler which makes tokens impossible unless they match the grammar.
424+
/// </summary>
425+
/// <param name="vocab">The vocabulary that this grammar will be used with</param>
426+
/// <param name="grammar"></param>
427+
/// <param name="root">Root rule of the grammar</param>
428+
/// <returns></returns>
429+
public void AddGrammar(SafeLlamaModelHandle.Vocabulary vocab, string grammar, string root)
418430
{
419431
unsafe
420432
{
421-
llama_sampler_chain_add(this, llama_sampler_init_grammar(model.Vocab.VocabNative, grammar, root));
433+
llama_sampler_chain_add(this, llama_sampler_init_grammar(vocab.VocabNative, grammar, root));
422434
}
423435

424436
// ReSharper disable InconsistentNaming
437+
// @details Intializes a GBNF grammar, see grammars/README.md for details.
438+
// @param vocab The vocabulary that this grammar will be used with.
439+
// @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
440+
// @param grammar_root The name of the start symbol for the grammar.
425441
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
426442
static extern unsafe IntPtr llama_sampler_init_grammar(LLamaVocabNative* model, string grammar_str, string grammar_root);
427443
// ReSharper restore InconsistentNaming

0 commit comments

Comments
 (0)