Skip to content

Commit b1dcf96

Browse files
authored
Merge pull request #766 from caer/765-include-vendored-libraries
#765 (Fixed): Include `llama.cpp` Vendored Libraries in Builds
2 parents 6f50477 + aa73917 commit b1dcf96

File tree

6 files changed

+35
-40
lines changed

6 files changed

+35
-40
lines changed

.github/workflows/llama-cpp-rs-check.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,13 @@ jobs:
3232
- name: Clippy
3333
run: cargo clippy
3434
- name: Fmt
35-
run: cargo fmt
35+
run: cargo fmt --check
3636
- name: Test
3737
run: cargo test --features sampler
38+
- name: Dry-Run Publishing llama-cpp-sys-2 Crate
39+
run: RUST_BACKTRACE=1 cargo publish --package llama-cpp-sys-2 --verbose --dry-run
40+
- name: Dry-Run Publishing llama-cpp-2 Crate
41+
run: RUST_BACKTRACE=1 cargo publish --package llama-cpp-2 --verbose --dry-run
3842
arm64:
3943
name: Check that it builds on various targets
4044
runs-on: ubuntu-latest

llama-cpp-2/src/model.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ use crate::model::params::LlamaModelParams;
1313
use crate::token::LlamaToken;
1414
use crate::token_type::{LlamaTokenAttr, LlamaTokenAttrs};
1515
use crate::{
16-
ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError,
17-
LlamaLoraAdapterInitError, LlamaModelLoadError, MetaValError, NewLlamaChatMessageError,
18-
StringToTokenError, TokenToStringError,
16+
ApplyChatTemplateError, ChatTemplateError, LlamaContextLoadError, LlamaLoraAdapterInitError,
17+
LlamaModelLoadError, MetaValError, NewLlamaChatMessageError, StringToTokenError,
18+
TokenToStringError,
1919
};
2020

2121
pub mod params;
@@ -488,7 +488,8 @@ impl LlamaModel {
488488
pub fn n_head_kv(&self) -> u32 {
489489
// It's never possible for this to panic because while the API interface is defined as an int32_t,
490490
// the field it's accessing is a uint32_t.
491-
u32::try_from(unsafe { llama_cpp_sys_2::llama_model_n_head_kv(self.model.as_ptr()) }).unwrap()
491+
u32::try_from(unsafe { llama_cpp_sys_2::llama_model_n_head_kv(self.model.as_ptr()) })
492+
.unwrap()
492493
}
493494

494495
/// Get metadata value as a string by key name

llama-cpp-2/src/sampling.rs

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ impl LlamaSampler {
6363
}
6464

6565
/// Resets the internal state of the sampler.
66-
///
66+
///
6767
/// This can be useful when you want to start fresh with a sampler without creating a new instance.
6868
pub fn reset(&mut self) {
6969
unsafe {
@@ -72,15 +72,15 @@ impl LlamaSampler {
7272
}
7373

7474
/// Gets the random seed used by this sampler.
75-
///
75+
///
7676
/// Returns:
7777
/// - For random samplers (dist, mirostat, mirostat_v2): returns their current seed
7878
/// - For sampler chains: returns the first non-default seed found in reverse order
7979
/// - For all other samplers: returns 0xFFFFFFFF
8080
#[must_use]
8181
pub fn get_seed(&self) -> u32 {
8282
unsafe { llama_cpp_sys_2::llama_sampler_get_seed(self.sampler) }
83-
}
83+
}
8484

8585
/// Combines a list of samplers into a single sampler that applies each component sampler one
8686
/// after another.
@@ -213,11 +213,11 @@ impl LlamaSampler {
213213
Self { sampler }
214214
}
215215

216-
/// Top-nσ sampling as described in academic paper "Top-nσ: Not All Logits Are You Need"
216+
/// Top-nσ sampling as described in academic paper "Top-nσ: Not All Logits Are You Need"
217217
/// <https://arxiv.org/pdf/2411.07641>
218218
///
219219
/// This method filters logits by selecting only those within *n* standard deviations of the mean.
220-
///
220+
///
221221
/// # Parameters
222222
/// - `n`: Number of standard deviations from the mean to include in sampling
223223
///
@@ -232,7 +232,7 @@ impl LlamaSampler {
232232
///
233233
/// let mut data_array = LlamaTokenDataArray::new(vec![
234234
/// LlamaTokenData::new(LlamaToken(0), 0.0, 0.0),
235-
/// LlamaTokenData::new(LlamaToken(1), 1.0, 0.0),
235+
/// LlamaTokenData::new(LlamaToken(1), 1.0, 0.0),
236236
/// LlamaTokenData::new(LlamaToken(2), 2.0, 0.0),
237237
/// ], false);
238238
///
@@ -314,17 +314,15 @@ impl LlamaSampler {
314314
) -> Option<Self> {
315315
let grammar_str = CString::new(grammar_str).unwrap();
316316
let grammar_root = CString::new(grammar_root).unwrap();
317-
317+
318318
let trigger_word_cstrings: Vec<CString> = trigger_words
319319
.into_iter()
320320
.map(|word| CString::new(word.as_ref()).unwrap())
321321
.collect();
322-
323-
let mut trigger_word_ptrs: Vec<*const c_char> = trigger_word_cstrings
324-
.iter()
325-
.map(|cs| cs.as_ptr())
326-
.collect();
327-
322+
323+
let mut trigger_word_ptrs: Vec<*const c_char> =
324+
trigger_word_cstrings.iter().map(|cs| cs.as_ptr()).collect();
325+
328326
let sampler = unsafe {
329327
llama_cpp_sys_2::llama_sampler_init_grammar_lazy(
330328
model.vocab_ptr(),
@@ -504,20 +502,14 @@ impl LlamaSampler {
504502
/// ```
505503
#[must_use]
506504
pub fn logit_bias(n_vocab: i32, biases: &[LlamaLogitBias]) -> Self {
507-
508505
let data = biases.as_ptr().cast::<llama_cpp_sys_2::llama_logit_bias>();
509-
506+
510507
let sampler = unsafe {
511-
llama_cpp_sys_2::llama_sampler_init_logit_bias(
512-
n_vocab,
513-
biases.len() as i32,
514-
data,
515-
)
508+
llama_cpp_sys_2::llama_sampler_init_logit_bias(n_vocab, biases.len() as i32, data)
516509
};
517-
510+
518511
Self { sampler }
519512
}
520-
521513
}
522514

523515
impl Drop for LlamaSampler {

llama-cpp-2/src/token/logit_bias.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub struct LlamaLogitBias {
1717

1818
impl LlamaLogitBias {
1919
/// Creates a new logit bias for a specific token with the given bias value.
20-
///
20+
///
2121
/// # Examples
2222
/// ```
2323
/// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -27,15 +27,12 @@ impl LlamaLogitBias {
2727
#[must_use]
2828
pub fn new(LlamaToken(token): LlamaToken, bias: f32) -> Self {
2929
Self {
30-
logit_bias: llama_cpp_sys_2::llama_logit_bias {
31-
token,
32-
bias,
33-
},
30+
logit_bias: llama_cpp_sys_2::llama_logit_bias { token, bias },
3431
}
3532
}
3633

3734
/// Gets the token this bias applies to.
38-
///
35+
///
3936
/// # Examples
4037
/// ```
4138
/// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -49,7 +46,7 @@ impl LlamaLogitBias {
4946
}
5047

5148
/// Gets the bias value.
52-
///
49+
///
5350
/// # Examples
5451
/// ```
5552
/// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -63,7 +60,7 @@ impl LlamaLogitBias {
6360
}
6461

6562
/// Sets the token this bias applies to.
66-
///
63+
///
6764
/// # Examples
6865
/// ```
6966
/// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -78,7 +75,7 @@ impl LlamaLogitBias {
7875
}
7976

8077
/// Sets the bias value.
81-
///
78+
///
8279
/// # Examples
8380
/// ```
8481
/// # use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
@@ -90,4 +87,4 @@ impl LlamaLogitBias {
9087
pub fn set_bias(&mut self, bias: f32) {
9188
self.logit_bias.bias = bias;
9289
}
93-
}
90+
}

llama-cpp-sys-2/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ include = [
4242
"/llama.cpp/ggml/src/llamafile/sgemm.cpp",
4343

4444
"/llama.cpp/pocs",
45+
"/llama.cpp/vendor",
4546

4647
"/llama.cpp/CMakeLists.txt",
4748
"/llama.cpp/common/CMakeLists.txt",

llama-cpp-sys-2/build.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -412,9 +412,9 @@ fn main() {
412412
// Static linking is problematic because NVIDIA does not provide culibos.lib,
413413
// and static CUDA libraries (like cublas_static.lib) are usually not shipped.
414414

415-
println!("cargo:rustc-link-lib=cudart"); // Links to cudart64_*.dll
416-
println!("cargo:rustc-link-lib=cublas"); // Links to cublas64_*.dll
417-
println!("cargo:rustc-link-lib=cublasLt"); // Links to cublasLt64_*.dll
415+
println!("cargo:rustc-link-lib=cudart"); // Links to cudart64_*.dll
416+
println!("cargo:rustc-link-lib=cublas"); // Links to cublas64_*.dll
417+
println!("cargo:rustc-link-lib=cublasLt"); // Links to cublasLt64_*.dll
418418

419419
// Link to CUDA driver API (nvcuda.dll via cuda.lib)
420420
if !cfg!(feature = "cuda-no-vmm") {

0 commit comments

Comments
 (0)