Skip to content

Commit 301c4e2

Browse files
authored
Merge pull request #488 from thewh1teagle/thewh1teagle/patch-1
Thewh1teagle/patch 1
2 parents 951afc3 + 5da6a85 commit 301c4e2

File tree

10 files changed

+58
-68
lines changed

10 files changed

+58
-68
lines changed

Cargo.lock

Lines changed: 1 addition & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[workspace]
22
resolver = "2"
3-
members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/usage", "examples/simple"]
3+
members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/simple"]
44

55
[workspace.dependencies]
66
# core library deps

examples/usage/src/main.rs renamed to examples/usage.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,30 @@
11
//! # Usage
2-
//!
2+
//!
33
//! This is just about the smallest possible way to do inference. To fetch a model from hugging face:
4-
//!
5-
//! ```bash
4+
//!
5+
//! ```console
66
//! git clone --recursive https://github.com/utilityai/llama-cpp-rs
77
//! cd llama-cpp-rs/examples/usage
88
//! wget https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GGUF/resolve/main/qwen2-1_5b-instruct-q4_0.gguf
9-
//! cargo run --bin usage -- qwen2-1_5b-instruct-q4_0.gguf
9+
//! cargo run --example usage -- qwen2-1_5b-instruct-q4_0.gguf
1010
//! ```
11-
use std::io::Write;
1211
use llama_cpp_2::context::params::LlamaContextParams;
1312
use llama_cpp_2::llama_backend::LlamaBackend;
1413
use llama_cpp_2::llama_batch::LlamaBatch;
1514
use llama_cpp_2::model::params::LlamaModelParams;
1615
use llama_cpp_2::model::LlamaModel;
1716
use llama_cpp_2::model::{AddBos, Special};
1817
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
18+
use std::io::Write;
1919

2020
#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
2121
fn main() {
2222
let model_path = std::env::args().nth(1).expect("Please specify model path");
2323
let backend = LlamaBackend::init().unwrap();
2424
let params = LlamaModelParams::default();
2525

26-
let prompt = "<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
26+
let prompt =
27+
"<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
2728
LlamaContextParams::default();
2829
let model =
2930
LlamaModel::load_from_file(&backend, model_path, &params).expect("unable to load model");
@@ -48,14 +49,11 @@ fn main() {
4849
}
4950
ctx.decode(&mut batch).expect("llama_decode() failed");
5051

51-
5252
let mut n_cur = batch.n_tokens();
5353

54-
5554
// The `Decoder`
5655
let mut decoder = encoding_rs::UTF_8.new_decoder();
5756

58-
5957
while n_cur <= n_len {
6058
// sample the next token
6159
{
@@ -72,7 +70,9 @@ fn main() {
7270
break;
7371
}
7472

75-
let output_bytes = model.token_to_bytes(new_token_id, Special::Tokenize).unwrap();
73+
let output_bytes = model
74+
.token_to_bytes(new_token_id, Special::Tokenize)
75+
.unwrap();
7676
// use `Decoder.decode_to_string()` to avoid the intermediate buffer
7777
let mut output_string = String::with_capacity(32);
7878
let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);

examples/usage/Cargo.toml

Lines changed: 0 additions & 19 deletions
This file was deleted.

llama-cpp-2/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69" }
1414
thiserror = { workspace = true }
1515
tracing = { workspace = true }
1616

17+
[dev-dependencies]
18+
encoding_rs = { workspace = true }
19+
1720
[features]
1821
cuda = ["llama-cpp-sys-2/cuda"]
1922
metal = ["llama-cpp-sys-2/metal"]
@@ -32,3 +35,7 @@ workspace = true
3235

3336
[package.metadata.docs.rs]
3437
features = ["sampler"]
38+
39+
[[example]]
40+
name = "usage"
41+
path = "../examples/usage.rs"

llama-cpp-2/src/context/sample/sampler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
//! like [`crate::context::LlamaContext`] or token history to the sampler.
44
//!
55
//! # Example
6-
//!
6+
//!
77
//! **Llama.cpp default sampler**
88
//!
99
//! ```rust

llama-cpp-2/src/model.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! A safe wrapper around `llama_model`.
2-
use std::ffi::CString;
32
use std::ffi::CStr;
3+
use std::ffi::CString;
44
use std::num::NonZeroU16;
55
use std::os::raw::c_int;
66
use std::path::Path;
@@ -550,7 +550,11 @@ impl LlamaModel {
550550
if res > buff.len() as i32 {
551551
return Err(ApplyChatTemplateError::BuffSizeError);
552552
}
553-
Ok::<String, ApplyChatTemplateError>(CStr::from_ptr(buff.as_mut_ptr()).to_string_lossy().to_string())
553+
Ok::<String, ApplyChatTemplateError>(
554+
CStr::from_ptr(buff.as_mut_ptr())
555+
.to_string_lossy()
556+
.to_string(),
557+
)
554558
}?;
555559
Ok(formatted_chat)
556560
}

llama-cpp-2/src/model/params/kv_overrides.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,13 @@ impl ParamOverrideValue {
3333
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_bool: *value }
3434
}
3535
ParamOverrideValue::Float(value) => {
36-
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
37-
val_f64: *value,
38-
}
36+
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_f64: *value }
3937
}
4038
ParamOverrideValue::Int(value) => {
4139
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_i64: *value }
4240
}
4341
ParamOverrideValue::Str(c_string) => {
44-
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
45-
val_str: *c_string,
46-
}
42+
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_str: *c_string }
4743
}
4844
}
4945
}

llama-cpp-2/src/token_type.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenAttrs {
4242
type Error = LlamaTokenTypeFromIntError;
4343

4444
fn try_from(value: llama_cpp_sys_2::llama_vocab_type) -> Result<Self, Self::Error> {
45-
Ok(Self(BitFlags::from_bits(value).map_err(|e| {
45+
Ok(Self(BitFlags::from_bits(value as _).map_err(|e| {
4646
LlamaTokenTypeFromIntError::UnknownValue(e.invalid_bits())
4747
})?))
4848
}

llama-cpp-sys-2/build.rs

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,9 @@ fn compile_bindings(
9191
llama_header_path: &Path,
9292
) -> Result<(), Box<dyn std::error::Error + 'static>> {
9393
println!("Generating bindings..");
94-
95-
let includes = [
96-
llama_header_path.join("ggml").join("include"),
97-
];
98-
94+
95+
let includes = [llama_header_path.join("ggml").join("include")];
96+
9997
let bindings = bindgen::Builder::default()
10098
.clang_args(includes.map(|path| format!("-I{}", path.to_string_lossy())))
10199
.header(
@@ -425,9 +423,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
425423
// nvcc.flag("-Wno-pedantic");
426424
// }
427425

428-
for lib in [
429-
"cuda", "cublas", "cudart", "cublasLt"
430-
] {
426+
for lib in ["cuda", "cublas", "cudart", "cublasLt"] {
431427
println!("cargo:rustc-link-lib={}", lib);
432428
}
433429
if !nvcc.get_compiler().is_like_msvc() {
@@ -623,31 +619,44 @@ fn gen_vulkan_shaders(out_path: impl AsRef<Path>) -> (impl AsRef<Path>, impl AsR
623619
.cpp(true)
624620
.get_compiler();
625621

626-
assert!(!cxx.is_like_msvc(), "Compiling Vulkan GGML with MSVC is not supported at this time.");
622+
assert!(
623+
!cxx.is_like_msvc(),
624+
"Compiling Vulkan GGML with MSVC is not supported at this time."
625+
);
627626

628627
let vulkan_shaders_gen_bin = out_path.as_ref().join("vulkan-shaders-gen");
629628

630629
cxx.to_command()
631630
.args([
632-
vulkan_shaders_src.join("vulkan-shaders-gen.cpp").as_os_str(),
633-
"-o".as_ref(), vulkan_shaders_gen_bin.as_os_str()
631+
vulkan_shaders_src
632+
.join("vulkan-shaders-gen.cpp")
633+
.as_os_str(),
634+
"-o".as_ref(),
635+
vulkan_shaders_gen_bin.as_os_str(),
634636
])
635-
.output().expect("Could not compile Vulkan shader generator");
637+
.output()
638+
.expect("Could not compile Vulkan shader generator");
636639

637640
let header = out_path.as_ref().join("ggml-vulkan-shaders.hpp");
638641
let source = out_path.as_ref().join("ggml-vulkan-shaders.cpp");
639642

640643
Command::new(vulkan_shaders_gen_bin)
641644
.args([
642-
"--glslc".as_ref(), "glslc".as_ref(),
643-
"--input-dir".as_ref(), vulkan_shaders_src.as_os_str(),
644-
"--output-dir".as_ref(), out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
645-
"--target-hpp".as_ref(), header.as_os_str(),
646-
"--target-cpp".as_ref(), source.as_os_str(),
647-
"--no-clean".as_ref()
645+
"--glslc".as_ref(),
646+
"glslc".as_ref(),
647+
"--input-dir".as_ref(),
648+
vulkan_shaders_src.as_os_str(),
649+
"--output-dir".as_ref(),
650+
out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
651+
"--target-hpp".as_ref(),
652+
header.as_os_str(),
653+
"--target-cpp".as_ref(),
654+
source.as_os_str(),
655+
"--no-clean".as_ref(),
648656
])
649-
.output().expect("Could not run Vulkan shader generator");
650-
657+
.output()
658+
.expect("Could not run Vulkan shader generator");
659+
651660
(out_path, source)
652661
}
653662

0 commit comments

Comments
 (0)