Changed for binary update to ggml-org/llama.cpp@0013715

martindevans · martindevans · commit 45d1964f5802 · 2025-04-21T14:58:31.000+01:00
diff --git a/LLama/Native/LLamaModelParams.cs b/LLama/Native/LLamaModelParams.cs
@@ -13,6 +13,11 @@ public unsafe struct LLamaModelParams
         /// todo: add support for llama_model_params.devices
         /// </summary>
         private IntPtr devices;
+
+        // NULL-terminated list of buffer types to use for tensors that match a pattern
+        // actual type: llama_model_tensor_buft_override* 
+        // todo: add support for tensor_buft_overrides
+        private IntPtr tensor_buft_overrides;
 
         /// <summary>
         /// // number of layers to store in VRAM
diff --git a/LLama/Native/LLamaModelQuantizeParams.cs b/LLama/Native/LLamaModelQuantizeParams.cs
@@ -89,6 +89,11 @@ public bool keep_split
         /// </summary>
         public IntPtr kv_overrides;
 
+        /// <summary>
+        /// pointer to vector containing tensor types
+        /// </summary>
+        public IntPtr tensor_types;
+
         /// <summary>
         /// Create a LLamaModelQuantizeParams with default values
         /// </summary>
diff --git a/LLama/Native/LLamaVocabPreType.cs b/LLama/Native/LLamaVocabPreType.cs
@@ -38,5 +38,9 @@ internal enum LLamaVocabPreType
     MINERVA = 27,
     DEEPSEEK3_LLM = 28,
     GPT4O = 29,
+    SUPERBPE = 30,
+    TRILLION = 31,
+    BAILINGMOE = 32,
+    LLAMA4 = 33,
 }
 // ReSharper restore InconsistentNaming
diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
@@ -389,6 +389,15 @@ static SafeLLamaContextHandle()
 
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern LLamaKvCacheNative llama_get_kv_self(SafeLLamaContextHandle ctx);
+
+        /// <summary>
+        /// Set whether the model is in warmup mode or not
+        /// If true, all model tensors are activated during llama_decode() to load and cache their weights.
+        /// </summary>
+        /// <param name="ctx"></param>
+        /// <param name="warmup"></param>
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        private static extern void llama_set_warmup(SafeLLamaContextHandle ctx, [MarshalAs(UnmanagedType.U1)] bool warmup);
         #endregion
 
         #region LoRA
diff --git a/LLama/Native/SafeLLamaSamplerHandle.cs b/LLama/Native/SafeLLamaSamplerHandle.cs
@@ -408,20 +408,36 @@ public void AddFillInMiddleInfill(SafeLlamaModelHandle model)
     }
 
     /// <summary>
-    /// Create a sampler which makes tokens impossible unless they match the grammar
+    /// Create a sampler which makes tokens impossible unless they match the grammar.
     /// </summary>
-    /// <param name="model"></param>
+    /// <param name="model">The model that this grammar will be used with</param>
     /// <param name="grammar"></param>
     /// <param name="root">Root rule of the grammar</param>
     /// <returns></returns>
     public void AddGrammar(SafeLlamaModelHandle model, string grammar, string root)
+    {
+        AddGrammar(model.Vocab, grammar, root);
+    }
+
+    /// <summary>
+    /// Create a sampler which makes tokens impossible unless they match the grammar.
+    /// </summary>
+    /// <param name="vocab">The vocabulary that this grammar will be used with</param>
+    /// <param name="grammar"></param>
+    /// <param name="root">Root rule of the grammar</param>
+    /// <returns></returns>
+    public void AddGrammar(SafeLlamaModelHandle.Vocabulary vocab, string grammar, string root)
     {
         unsafe
         {
-            llama_sampler_chain_add(this, llama_sampler_init_grammar(model.Vocab.VocabNative, grammar, root));
+            llama_sampler_chain_add(this, llama_sampler_init_grammar(vocab.VocabNative, grammar, root));
         }
 
         // ReSharper disable InconsistentNaming
+        // @details Intializes a GBNF grammar, see grammars/README.md for details.
+        // @param vocab The vocabulary that this grammar will be used with.
+        // @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
+        // @param grammar_root The name of the start symbol for the grammar.
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         static extern unsafe IntPtr llama_sampler_init_grammar(LLamaVocabNative* model, string grammar_str, string grammar_root);
         // ReSharper restore InconsistentNaming