Merge pull request #766 from caer/765-include-vendored-libraries

MarcusDunn · web-flow · commit b1dcf96df9f1 · 2025-07-09T12:07:17.000-07:00
#765 (Fixed): Include `llama.cpp` Vendored Libraries in Builds
diff --git a/.github/workflows/llama-cpp-rs-check.yml b/.github/workflows/llama-cpp-rs-check.yml
@@ -32,9 +32,13 @@ jobs:
       - name: Clippy
         run: cargo clippy
       - name: Fmt
-        run: cargo fmt
+        run: cargo fmt --check
       - name: Test
         run: cargo test --features sampler
+      - name: Dry-Run Publishing llama-cpp-sys-2 Crate
+        run: RUST_BACKTRACE=1 cargo publish --package llama-cpp-sys-2 --verbose --dry-run
+      - name: Dry-Run Publishing llama-cpp-2 Crate
+        run: RUST_BACKTRACE=1 cargo publish --package llama-cpp-2 --verbose --dry-run
   arm64:
     name: Check that it builds on various targets
     runs-on: ubuntu-latest
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -13,9 +13,9 @@ use crate::model::params::LlamaModelParams;
 use crate::token::LlamaToken;
 use crate::token_type::{LlamaTokenAttr, LlamaTokenAttrs};
 use crate::{
-    ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError,
-    LlamaLoraAdapterInitError, LlamaModelLoadError, MetaValError, NewLlamaChatMessageError,
-    StringToTokenError, TokenToStringError,
+    ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError, LlamaLoraAdapterInitError,
+    LlamaModelLoadError, MetaValError, NewLlamaChatMessageError, StringToTokenError,
+    TokenToStringError,
 };
 
 pub mod params;
@@ -488,7 +488,8 @@ impl LlamaModel {
     pub fn n_head_kv(&self) -> u32 {
         // It's never possible for this to panic because while the API interface is defined as an int32_t,
         // the field it's accessing is a uint32_t.
-        u32::try_from(unsafe { llama_cpp_sys_2::llama_model_n_head_kv(self.model.as_ptr()) }).unwrap()
+        u32::try_from(unsafe { llama_cpp_sys_2::llama_model_n_head_kv(self.model.as_ptr()) })
+            .unwrap()
     }
 
     /// Get metadata value as a string by key name
diff --git a/llama-cpp-2/src/sampling.rs b/llama-cpp-2/src/sampling.rs
@@ -63,7 +63,7 @@ impl LlamaSampler {
     }
 
     /// Resets the internal state of the sampler.
-    /// 
+    ///
     /// This can be useful when you want to start fresh with a sampler without creating a new instance.
     pub fn reset(&mut self) {
         unsafe {
@@ -72,15 +72,15 @@ impl LlamaSampler {
     }
 
     /// Gets the random seed used by this sampler.
-    /// 
+    ///
     /// Returns:
     /// - For random samplers (dist, mirostat, mirostat_v2): returns their current seed
     /// - For sampler chains: returns the first non-default seed found in reverse order
     /// - For all other samplers: returns 0xFFFFFFFF
     #[must_use]
     pub fn get_seed(&self) -> u32 {
         unsafe { llama_cpp_sys_2::llama_sampler_get_seed(self.sampler) }
-    }    
+    }
 
     /// Combines a list of samplers into a single sampler that applies each component sampler one
     /// after another.
@@ -213,11 +213,11 @@ impl LlamaSampler {
         Self { sampler }
     }
 
-    /// Top-nσ sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" 
+    /// Top-nσ sampling as described in academic paper "Top-nσ: Not All Logits Are You Need"
     /// <https://arxiv.org/pdf/2411.07641>
     ///
     /// This method filters logits by selecting only those within *n* standard deviations of the mean.
-    /// 
+    ///
     /// # Parameters
     /// - `n`: Number of standard deviations from the mean to include in sampling
     ///
@@ -232,7 +232,7 @@ impl LlamaSampler {
     ///
     /// let mut data_array = LlamaTokenDataArray::new(vec![
     ///     LlamaTokenData::new(LlamaToken(0), 0.0, 0.0),
-    ///     LlamaTokenData::new(LlamaToken(1), 1.0, 0.0), 
+    ///     LlamaTokenData::new(LlamaToken(1), 1.0, 0.0),
     ///     LlamaTokenData::new(LlamaToken(2), 2.0, 0.0),
     /// ], false);
     ///
@@ -314,17 +314,15 @@ impl LlamaSampler {
     ) -> Option<Self> {
         let grammar_str = CString::new(grammar_str).unwrap();
         let grammar_root = CString::new(grammar_root).unwrap();
-        
+
         let trigger_word_cstrings: Vec<CString> = trigger_words
             .into_iter()
             .map(|word| CString::new(word.as_ref()).unwrap())
             .collect();
-            
-        let mut trigger_word_ptrs: Vec<*const c_char> = trigger_word_cstrings
-            .iter()
-            .map(|cs| cs.as_ptr())
-            .collect();
-    
+
+        let mut trigger_word_ptrs: Vec<*const c_char> =
+            trigger_word_cstrings.iter().map(|cs| cs.as_ptr()).collect();
+
         let sampler = unsafe {
             llama_cpp_sys_2::llama_sampler_init_grammar_lazy(
                 model.vocab_ptr(),
@@ -504,20 +502,14 @@ impl LlamaSampler {
     /// ```
     #[must_use]
     pub fn logit_bias(n_vocab: i32, biases: &[LlamaLogitBias]) -> Self {
-
         let data = biases.as_ptr().cast::<llama_cpp_sys_2::llama_logit_bias>();
-        
+
         let sampler = unsafe {
-            llama_cpp_sys_2::llama_sampler_init_logit_bias(
-                n_vocab,
-                biases.len() as i32,
-                data,
-            )
+            llama_cpp_sys_2::llama_sampler_init_logit_bias(n_vocab, biases.len() as i32, data)
         };
-        
+
         Self { sampler }
     }
-
 }
 
 impl Drop for LlamaSampler {
diff --git a/llama-cpp-2/src/token/logit_bias.rs b/llama-cpp-2/src/token/logit_bias.rs
@@ -17,7 +17,7 @@ pub struct LlamaLogitBias {
 
 impl LlamaLogitBias {
     /// Creates a new logit bias for a specific token with the given bias value.
-    /// 
+    ///
     /// # Examples
     /// ```
     /// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -27,15 +27,12 @@ impl LlamaLogitBias {
     #[must_use]
     pub fn new(LlamaToken(token): LlamaToken, bias: f32) -> Self {
         Self {
-            logit_bias: llama_cpp_sys_2::llama_logit_bias {
-                token,
-                bias,
-            },
+            logit_bias: llama_cpp_sys_2::llama_logit_bias { token, bias },
         }
     }
 
     /// Gets the token this bias applies to.
-    /// 
+    ///
     /// # Examples
     /// ```
     /// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -49,7 +46,7 @@ impl LlamaLogitBias {
     }
 
     /// Gets the bias value.
-    /// 
+    ///
     /// # Examples
     /// ```
     /// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -63,7 +60,7 @@ impl LlamaLogitBias {
     }
 
     /// Sets the token this bias applies to.
-    /// 
+    ///
     /// # Examples
     /// ```
     /// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -78,7 +75,7 @@ impl LlamaLogitBias {
     }
 
     /// Sets the bias value.
-    /// 
+    ///
     /// # Examples
     /// ```
     /// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -90,4 +87,4 @@ impl LlamaLogitBias {
     pub fn set_bias(&mut self, bias: f32) {
         self.logit_bias.bias = bias;
     }
-}
+}
diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -42,6 +42,7 @@ include = [
     "/llama.cpp/ggml/src/llamafile/sgemm.cpp",
 
     "/llama.cpp/pocs",
+    "/llama.cpp/vendor",
 
     "/llama.cpp/CMakeLists.txt",
     "/llama.cpp/common/CMakeLists.txt",
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -412,9 +412,9 @@ fn main() {
             // Static linking is problematic because NVIDIA does not provide culibos.lib,
             // and static CUDA libraries (like cublas_static.lib) are usually not shipped.
 
-            println!("cargo:rustc-link-lib=cudart");       // Links to cudart64_*.dll
-            println!("cargo:rustc-link-lib=cublas");       // Links to cublas64_*.dll
-            println!("cargo:rustc-link-lib=cublasLt");     // Links to cublasLt64_*.dll
+            println!("cargo:rustc-link-lib=cudart"); // Links to cudart64_*.dll
+            println!("cargo:rustc-link-lib=cublas"); // Links to cublas64_*.dll
+            println!("cargo:rustc-link-lib=cublasLt"); // Links to cublasLt64_*.dll
 
             // Link to CUDA driver API (nvcuda.dll via cuda.lib)
             if !cfg!(feature = "cuda-no-vmm") {